This Mark Down file contains code and results evaluating the effect of wildfire smoke on cardiopulmonary morbidity and mortality along the Colorado front range communities from 2010 to 2015. The hypothesis for this study is that we will see an increase in the risk of cardiopulmonary morbidity and mortality associated with an increase in PM2.5 due to wildfire smoke.
Study bounding box.
# clip by bbox function ------
bbox_clip <- function(sf, bbox) {
# find the CRS of the sf object
crs <- sf::st_crs(sf)$proj4string
# create matrix
x <- c(bbox[1], bbox[1], bbox[3], bbox[3], bbox[1])
y <- c(bbox[2], bbox[4], bbox[4], bbox[2], bbox[2])
coords <- matrix(cbind(x, y), ncol=2)
# create polygon and assign same coord crs as sf object
coords_poly <- sp::Polygon(coords)
bbox_poly <- sp:: SpatialPolygons(list(sp::Polygons(list(coords_poly),
ID = "bbox")), proj4string = sp::CRS(crs))
# convert to sf feature
bbox_sf <- st_as_sf(bbox_poly)
# clip sf object
clipped_sf <- sf[bbox_sf,]
return(clipped_sf)
}
# clipping bounding box
study_bbox <- st_bbox(c(xmin=-105.3, xmax=-104.5, ymax=41, ymin = 38))
Reading in county shapefiles.
# county subset
county_sub_name <- c("Larimer", "Weld", "Boulder", "Broomfield", "Adams",
"Denver", "Jefferson", "Arapahoe", "Douglas", "El Paso",
"Pueblo")
# read in county shapefile and subset to only colorado fips
co_county_sf <- st_read("../../data/shapefiles/us_county",
layer = "us_county") %>%
# limit to colorado
filter(STATEFP == "08") %>%
# create fips variable
mutate(fips = paste0(STATEFP, COUNTYFP))
## Reading layer `us_county' from data source `/Users/ganr1/Documents/public_repo/colorado_wildfire/data/shapefiles/us_county' using driver `ESRI Shapefile'
## Simple feature collection with 3108 features and 9 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -124.7631 ymin: 24.5231 xmax: -66.9499 ymax: 49.38436
## epsg (SRID): 4326
## proj4string: +proj=longlat +datum=WGS84 +no_defs
# save wgs84 crs
wgs <- st_crs(co_county_sf)
# county clip
county_clip <- bbox_clip(co_county_sf, study_bbox)
# extract county names
county_text <- county_clip %>%
st_transform(wgs) %>%
st_centroid() %>%
st_coordinates() %>%
cbind(., as.character(county_clip$NAME)) %>%
as_data_frame() %>%
rename(lon = X, lat = Y, county = V3) %>%
mutate(lon = as.numeric(lon), lat = as.numeric(lat))
Reading in Front Range Grid points that will define who is included in the study area.
# read in front range grid csv
frontrange_grid <- read_csv('../../data/smoke/front_range_grid.csv')
# read in grid simple features and limit to the front range grid
study_grid <- read_sf('../../data/shapefiles/co_krig_grid/', crs = wgs) %>%
filter(GRID_ID %in% frontrange_grid$GRID_ID)
nrow(frontrange_grid)
## [1] 75
# print out bounding box for front range grid
ggplot(frontrange_grid,
aes(x=lon, y=lat, label=paste(round(lon,2), round(lat,2), sep=', '))) +
geom_text(size = 4) +
xlim(-105.5,-104.4)
Ideally I’d like to give the exact coordinates for the bounding box for Kate, but I don’t remember how to exacly subset. I’ve printed out the Lon/Lat coords of the grid cells I used in air pollution measures for the front range. Keep in mind I projected it as a WGS 84 to align with other shapefiles. But it looks like the bounding box cells would be Top left: -105.39, 40.63 Top right: -104.62, 40.67 Bottom left: -105.24, 38.59 Bottom right: -104.49, 38.62
Interstate shapefile limited to I-25 and I-70.
# read in colorado roads shapefile
interstate <- st_read(paste0("../../data/shapefiles/tl_2015_08_prisecroads"),
layer = "tl_2015_08_prisecroads") %>%
# filter to I25 lines
filter(RTTYP == "I") %>%
st_transform(crs = wgs)
## Reading layer `tl_2015_08_prisecroads' from data source `/Users/ganr1/Documents/public_repo/colorado_wildfire/data/shapefiles/tl_2015_08_prisecroads' using driver `ESRI Shapefile'
## Simple feature collection with 3230 features and 4 fields
## geometry type: LINESTRING
## dimension: XY
## bbox: xmin: -109.0602 ymin: 36.99251 xmax: -102.0417 ymax: 41.00307
## epsg (SRID): 4269
## proj4string: +proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs
# interstate clip
i_clip <- bbox_clip(interstate, study_bbox)
Reading in city boundary simple features.
# read in city polygons 2010
city <- st_read("../../data/shapefiles/Colorado_City_Point_Locations/",
layer = "Colorado_City_Point_Locations") %>%
st_transform(crs = wgs)
## Reading layer `Colorado_City_Point_Locations' from data source `/Users/ganr1/Documents/public_repo/colorado_wildfire/data/shapefiles/Colorado_City_Point_Locations' using driver `ESRI Shapefile'
## Simple feature collection with 587 features and 8 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: -109.0146 ymin: 37.00304 xmax: -102.0804 ymax: 40.98833
## epsg (SRID): 4269
## proj4string: +proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs
# limit cities
city_points <- city %>%
filter(NAME %in% c("FORT COLLINS", "PUEBLO", "GREELEY", "BOULDER", "DENVER",
"COLORADO SPRINGS")) %>%
mutate(city = stringr::str_to_title(NAME))
Reading 2015 population estimate geotiff for Colorado. I made this file for the ALA project from the SEDAC 2015 global estimate.
# read colorado 2015 population raster
co_pop_2015 <- raster::raster("../../data/shapefiles/2015-ColoradoPopDensity.tif")
# extract bbox of clipped county subset
fr_extent <- raster::extent(st_bbox(county_clip)[c(1,3,2,4)])
# raster
fr_pop_2015 <- raster::crop(co_pop_2015, fr_extent)
# i'm going to create a shapefile/simple features; easier to plot
front_range_pop_sf <- st_as_sf(raster::rasterToPolygons(fr_pop_2015)) %>%
rename(popden = X2015.ColoradoPopDensity) %>%
# filtering to cells > 100
filter(popden > 100)
# cut once more to county shapefile so that populations outside the counties are not present
pop_clip <- front_range_pop_sf[county_clip,]
Study map that will contain an inset map to highlight the area of interest.
Inset map.
# create county inset map
inset_map <- ggplot(co_county_sf) +
geom_sf(fill = "transparent", color = "black", size = 0.1) +
geom_sf(data = study_grid,
fill = 'red',
color = 'transparent',
alpha = 0.5) +
ggtitle("Colorado: Study Area") +
theme(plot.title = element_text(size = 12),
panel.background = element_blank(),
panel.grid.major = element_line(colour = 'transparent'),
panel.grid.minor = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank(),
plot.background = element_rect(fill = "transparent",
color = "transparent"))
inset_map
Study map.
# plot map
study_map <-ggplot(pop_clip) +
# start with plot of simple features of populations
geom_sf(aes(fill=popden), color = NA) +
# define aesthetics of poipulation
scale_fill_gradient(name = expression("Population per km"^2),
low = "#26d0ce", high = "#1a2980") +
# plot lines of counties
geom_sf(data = county_clip, color = "black",
fill = "transparent", size = 0.5) +
# plot i-25 and i-70
geom_sf(data = i_clip, aes(color = "Interstate"), show.legend = "line") +
# plot study grid
geom_sf(data = study_grid, aes(color = "Study Grid"), fill = "transparent",
size = 0.1, show.legend = "line") +
# custom colors for interstate and study grid
scale_color_manual(values = c("Interstate" = "#0f9b0f",
"Study Grid" = "red"),
labels = c("Interstate", "Study Grid"),
name = "Boundary") +
# major city points and names
geom_point(data = city_points, aes(x = LONG, y = LAT), color = "#3f2b96") +
geom_text(data = city_points, aes(x = LONG, y = LAT, label = city),
color = "#3f2b96", size = 4, hjust = 1, vjust = -0.6) +
# theme
theme(panel.background = element_blank(),
panel.grid.major = element_line(colour = 'transparent'),
panel.grid.minor = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank(),
legend.key = element_rect(fill = NA, colour = NA, size = 0.25))
study_map
Final map with inset. Using the grid package.I may add the AQS monitors to this map.
# save map
#tiff(filename = "../analysis/figures/study_map.tiff", w=850, h=550)
grid::grid.newpage()
vpmain <- grid::viewport(width = 1, height = 1, x = 0.5, y = 0.5)
vpinset <- grid::viewport(width = 0.25, height = 0.25, x = 0.8, y = 0.85)
print(study_map, vp = vpmain)
print(inset_map, vp = vpinset)
Kate has estimated PM2.5 at 15x15 km grid level using kriged surface site monitors for the entire US from 2010 to 2015. I have limited the gridded PM2.5 to Colorado Front Range study grid. I have also estimated population-weighted PM2.5 for all Colorado counties (although I don’t think I’ll use it for this project). There were concerns about the accuracy of the krig PM in areas outside of the front range communities since most PM stations are on the Front Range.
I am also going to set up this lag function here to create lagged days that will be used in the analysis.
# defining a lag function that I will use later in the distributed lag model
funlag <- function(var, n=5){
var <- enquo(var)
indices <- seq_len(n)
map( indices, ~quo(lag(!!var, !!.x)) ) %>%
set_names(sprintf("%s_lag%d", rlang::quo_text(var), indices))
}
I’d like to find the most likely grid for each county and plot each grid for each county instead of the population-weighted county estimates. I’m going to find the county name for which has the largest proportion of intersection of the grid.
# identifying the county that most of the grid lies in
grid_county_id <- st_intersection(study_grid, county_clip) %>%
dplyr::select(GRID_ID, NAME, fips)
# proportion each grid in county
grid_prop <- as.numeric(st_area(grid_county_id)/st_area(study_grid))
# find the county id that contains the highest proportion for each grid
grid_county_final <- grid_county_id %>%
mutate(prop_int = grid_prop,
GRID_ID = as.character(GRID_ID)) %>%
group_by(GRID_ID) %>%
filter(prop_int == max(prop_int))
# remove geometry
st_geometry(grid_county_final) <- NULL
Consider removing this following section.
# front range fips
fr_fips <- c("08001", "08005", "08013", "08031", "08035", "08041",
"08059", "08069", "08123")
# read pm data
co_pm <- read_csv("../../data/smoke/1015-county_popwt_pm.csv") %>%
# limiting to colorado counties only by reading 1st 2 digs of 5-dig fips code
filter(fips %in% fr_fips) %>%
# renaming variable pm_smk to pm_diff; more accurate description of var
rename(pm_diff = pm_smk) %>%
# us
mutate(county_name = case_when(fips == '08001' ~ 'Adams',
fips == '08005' ~ 'Arapahoe',
fips == '08013' ~ 'Boulder',
fips == '08031' ~ 'Denver',
fips == '08035' ~ 'Douglas',
fips == '08041' ~ 'El Paso',
fips == '08059' ~ 'Jefferson',
fips == '08069' ~ 'Larimer',
fips == '08123' ~ 'Weld'),
day = as.factor(weekdays(date)), # create day of week based on var
weekend = ifelse(day %in% c("Saturday", "Sunday"), 1, 0),
month = as.factor(lubridate::month(date)), # extract month as factor
year = as.factor(lubridate::year(date)), # extract year as factor
season = as.factor(case_when(month %in% c(12, 1, 2) ~ "winter",
month %in% c(3:5) ~ "spring",
month %in% c(6:8) ~ "summer",
month %in% c(9:11)~ "fall")),
# creating binary smoke to require 50% of county with smoke overhead
# and difference between estimate and seasonal background to be >0
# and to be within the month of April to Octoboer
smoke0_hms = ifelse(pm_diff > 0 & month %in% c(5:10) & hms > 0.5,1,0),
smoke5_hms = ifelse(pm_diff > 5 & month %in% c(5:10) & hms > 0.1,1,0),
smoke10_hms = ifelse(pm_diff > 10 & month %in% c(5:10) & hms > 0.1,1,0),
smoke15_hms = ifelse(pm_diff > 15 & month %in% c(5:10) & hms > 0.1,1,0),
# transforming pm kriged estimates to a 10 unit increase in pm
pm = pm_krig/10,
# continuous pm smoke accounting for hms
cpm_smk = ifelse(hms > 0.1 & month %in% c(5:10), pm_diff, 0),
cpm_smk = ifelse(cpm_smk < 0, 0, cpm_smk),
aqi_warning = ifelse(aqi_cat %in%
c('Unhealthy', 'Unhealthy for Sensitive Groups'), 1, 0)) %>%
# sorting by fips and date to estimate lag for each county by date
arrange(fips, date) %>%
# group by fips
group_by(fips) %>%
# apply funlag to create lagged estimates
mutate(., !!!funlag(pm,5), !!!funlag(smoke0_hms,5), !!!funlag(smoke5_hms,5),
!!!funlag(smoke10_hms,5), !!!funlag(smoke15_hms,5),
!!!funlag(temp_f, 5)) %>%
dplyr::select(-state, -month) # removing state and month to bind with casecross
To try and estimate PM2.5 from smoke vs. other sources, I’ve taken the estimated kriged value of PM2.5 and subtracted off the seasonal background of PM2.5 and only consider this difference to be from smoke if there is smoke in the atmospheric column using the HMS grids. For county population-weighted values, I use a rather loose definition of at least 10% of the county with smoke overhead. The grid-level estimate of smoke is probably more reliable. I’ll show some quick diagnostic plots below.
Here is a plot of the population-weighted smoke PM2.5 for each Front Range county.
pm_plot <- ggplot(data=co_pm, aes(x=date, y=cpm_smk)) +
geom_point(size = 0.5, color = "#6441a5") +
facet_wrap(~county_name) +
xlab('Date') +
ylab('County Smoke PM2.5 ug/m^3') +
theme_minimal()
# plot
print(pm_plot)
This definition of smoke seems reasonable. I do have some concerns with the Kriged model in that there isn’t enough stations to capture spatial variations. For example, the 2012 Waldo Canyon Fire likely affected south Front Range counties like El Paso a little after the High Park Fire likely affected northern front range counties like Larmier. Does the county population-weighted time series reflect this? All the smoke peaks across look uniform across Front Range counties. This could be explained by the kriging process if certain sites are really driving the smoothed surfaces.
Maps of the study area and specific fires may help understand the exposure series. I will work on these after some input.
# code chunk that reads in grid pm2.5 and prepares some lagged variables
# for joining with health outcomes data.
# read new grid/wrfgrid key
grid_key <- read_csv('../../data/shapefiles/wrfgrid_key.csv',
col_types = list(GRID_ID = col_character(),
WRFGRID_ID = col_character())) %>%
dplyr::select(-COLX, -ROWY)
# read grid pm
grid_pm <- read_csv('../../data/smoke/1015-grid_pm.csv',
col_types = list(GRID_ID = col_character())) %>%
# create some variables
mutate(pm_diff_g = pm25_grid - sbg_pm_grid,
month = as.factor(lubridate::month(date)), # extract month as factor
gsmk5_hms = ifelse(pm_diff_g > 5 & month %in% c(4:10) & hms_grid == 1,
1,0),
gsmk10_hms = ifelse(pm_diff_g > 10 & month %in% c(4:10) & hms_grid == 1,
1,0),
gsmk15_hms = ifelse(pm_diff_g > 15 & month %in% c(4:10) & hms_grid == 1,
1,0),
# continuous pm smoke accounting for hms
gpm_smk = ifelse(hms_grid == 1 & month %in% c(5:10), pm_diff_g, 0),
gpm_smk = ifelse(gpm_smk < 0, 0, gpm_smk),
# transforming pm smke estimates to a 10 unit increase in pm
gpm_smk10unit = gpm_smk/10) %>%
# sorting by GRID_ID and date to estimate lag for each county by date
arrange(GRID_ID, date) %>%
# group by GRID_ID
group_by(GRID_ID) %>%
# apply funlag to create lagged estimates
mutate(., !!!funlag(gpm_smk10unit,5), !!!funlag(gsmk5_hms,5),
!!!funlag(gsmk10_hms,5), !!!funlag(gsmk15_hms,5),
!!!funlag(temp_f_grid, 5)) %>%
dplyr::select(-month) %>%
# filter to grids in study area
filter(GRID_ID %in% frontrange_grid$GRID_ID) %>%
# join county id
left_join(dplyr::select(grid_county_final, GRID_ID, NAME, fips),
by = 'GRID_ID')
Grid smoke values in each front range county.
# front range fips
fr_fips <- c("08001", "08005", "08013", "08031", "08035", "08041",
"08059", "08069", "08123")
pm_plot <- ggplot(data=filter(grid_pm, fips %in% fr_fips),
aes(x=date, y=gpm_smk, group = GRID_ID)) +
geom_point(size = 0.5, color = "#6441a5") +
facet_wrap(~NAME) +
xlab('Date') +
ylab(expression(paste("Smoke PM"[2.5]," in ", mu,"g/m"^3))) +
theme_minimal()
# plot
print(pm_plot)
Summary statistics of PM2.5 in grid.
grid_pm %>%
ungroup() %>%
summarize(mean_pm25 = mean(pm25_grid), max_pm25 = max(pm25_grid),
min_pm25 = 0)
Wildfire season summary stats.
grid_pm %>%
mutate(month = lubridate::month(date)) %>%
filter(month %in% 5:10) %>%
ungroup() %>%
summarize(mean_pm25 = mean(pm25_grid), max_pm25 = max(pm25_grid),
min_pm25 = 0, mean_smk = mean(gpm_smk), max_smk = max(gpm_smk))
Reading ozone estimates.
ozone <- read_csv('../../data/smoke/1015-frontrange_kriged_o3.csv') %>%
mutate(GRID_ID = as.character(GRID_ID)) %>%
# sorting by GRID_ID and date to estimate lag for each county by date
arrange(GRID_ID, date) %>%
# group by GRID_ID
group_by(GRID_ID) %>%
# apply funlag to create lagged estimates
mutate(., !!!funlag(o3_8hr_max_ppb,5))
I’ve created time-stratified case-crossover data frames for both mortality and hospitalization data provided by CDPHE with reference periods within a month of the observation. The justification for a ‘month’ period is appropriate for mortality I think, as I think it’s reasonable counter factual window in when a person could have died. A longer period like we’ve previously used like the entire wildfire season may not be as appropriate for mortality for a person who is already at risk of death.
# load casecross list
load("../../data/health/1015-co_morbidity_casecross_list.RData")
# load icd9
load("../../data/health/icd9_outcome_vectors.RData")
Cleaning hospitalization data and joining with both grid-level and count-level PM2.5 data. I consider the following inpatient hospitalization events with admitting source via the emergency room or urgent care (as a way to get at acute events): all respiratory, asthma, chronic obstructive pulmonary disease (COPD), acute bronchitis, pneumonia, all cardiovascular events, arrhythmia, cerebrovascular, heart failure, ischemic heart disease, and myocardial infraction (which is a subcategory of IHD).
Some notes for the Colorado hospitalization data is that unlike Washington and Oregon, I have no way of identifying multiple events per person so I am not able to limit to first event and I am assuming each event is independent of others. Also, unlike Washington and Oregon, I’ve decided to go with monthly referent periods rather than the whole wildfire season. Possible suggestion would be to run sensitivity analyses to see if this has an impact.
# extract names
outcome <- c('All Respiratory', 'Asthma', 'COPD', 'Acute Bronchitis',
'Pneumonia', 'All CVD', 'Arrhythmia', 'Cerebrovascular',
'Heart Failure', 'Ischemic Heart Disease',
'Myocardial Infarction')
# reduce case-crossover list to only summer months and join GRID ID
co_hosp_list <- co_morbidity_cc_list %>%
# desired format to make sure it's right
map(~ mutate(., outcome = as.numeric(as.character(outcome)),
date = as.Date(as.character(date)),
month = as.factor(lubridate::month(date))) %>%
# filter out 2016; I don't have pm data yet
filter(date <= "2015-12-30") %>%
filter(month %in% 5:10) %>%
filter(fips %in% fr_fips) %>%
dplyr::select(-state, -month) %>%
# join with grid key
left_join(grid_key, by = 'WRFGRID_ID') %>%
left_join(co_pm, by = c("fips", "date")) %>%
# left join grid pm
left_join(grid_pm, by = c("GRID_ID", "date")) %>%
left_join(ozone, by = c("GRID_ID", "date")) %>%
# filter to front range grid ids
filter(GRID_ID %in% frontrange_grid$GRID_ID)) %>%
# add outcome name to each dataframe
map2(.x = ., .y = outcome, ~mutate(.x, out_name = .y))
Creating and plotting the time series of respiratory and cardiovascular events for the state of Colorado.
# create time series of hospitalizations
ts_hosp <- co_morbidity_cc_list[c(1,6)] %>%
# using map 2 to add outcome names to each dataframe
map2(., names(co_morbidity_cc_list[c(1,6)]), function(df, name){
counts <- df %>%
filter(outcome == 1) %>%
group_by(date) %>%
summarise(n = n()) %>%
mutate(outcome = name,
date = as.Date(date))
}) %>%
# bind dataframes
map_dfr(.,rbind)
ts_hosp_plot <- ggplot(data=ts_hosp, aes(x=date, y=n)) +
geom_point() +
facet_wrap(~outcome) +
theme_minimal()
ts_hosp_plot
The cardiovascular hospitalization is pretty uniform, with some noticeable dips at the end of the year of 2013 and when ICD9 switches to ICD10 at the end of the time series. I’d say this is probably a systemic thing that has to do with coding or how hospitals recorded events rather than an actual big drop in the rate of CVD hospitalizations. Although may Kirk or some others at CDPHE may know.
Number of inpatient hospitalization events that occurred in Colorado Front Range Communities between 2010 and 2015 in the May to October Months. Also note that October of 2015 is when ICD9 was switched to ICD10, so I do not have that last month.
strata <- c('All', 'Sex', 'Age')
# estimate hospitalizations
hosp_counts <- co_hosp_list %>%
map_dfr(.,function(df){
map_dfr(strata, function(x){
if(x == 'All'){
counts <- df %>%
filter(outcome == 1) %>%
group_by(out_name) %>%
summarise(n_events = n()) %>%
mutate(strata = x) %>%
dplyr::select(out_name, strata, n_events)
} else if (x == 'Sex'){
counts <- df %>%
filter(outcome == 1) %>%
group_by(out_name, sex) %>%
summarise(n_events = n()) %>%
rename(strata = sex) %>%
dplyr::select(out_name, strata, n_events)
} else {
counts <- df %>%
filter(outcome == 1) %>%
group_by(out_name, age_cat) %>%
summarise(n_events = n()) %>%
rename(strata = age_cat) %>%
dplyr::select(out_name, strata, n_events)
}
}) # end strata map
}) %>% # end outcome list map
spread(strata, n_events) %>%
dplyr::select(out_name, All, F, M, age_under_15, age_15_to_65, age_over_65) %>%
mutate_at(vars(F:age_over_65), funs(round((./All)*100,1)))
# print death counts
knitr::kable(hosp_counts, caption = 'Number of Inpatient Events')
| out_name | All | F | M | age_under_15 | age_15_to_65 | age_over_65 |
|---|---|---|---|---|---|---|
| Acute Bronchitis | 1884 | 41.9 | 58.1 | 65.3 | 20.1 | 14.5 |
| All CVD | 68356 | 47.8 | 52.2 | 0.3 | 40.2 | 59.5 |
| All Respiratory | 46585 | 50.8 | 49.2 | 7.9 | 44.9 | 47.2 |
| Arrhythmia | 9958 | 48.8 | 51.2 | 0.4 | 31.9 | 67.6 |
| Asthma | 6816 | 53.0 | 47.0 | 11.5 | 65.8 | 22.6 |
| Cerebrovascular | 10484 | 34.4 | 65.6 | 0.1 | 48.3 | 51.6 |
| COPD | 6656 | 53.6 | 46.4 | 0.2 | 37.3 | 62.5 |
| Heart Failure | 10090 | 33.7 | 66.3 | 0.1 | 48.2 | 51.7 |
| Ischemic Heart Disease | 10930 | 50.2 | 49.8 | 0.4 | 27.5 | 72.1 |
| Myocardial Infarction | 13660 | 53.6 | 46.4 | 0.3 | 33.7 | 65.9 |
| Pneumonia | 14694 | 51.2 | 48.8 | 5.4 | 40.7 | 53.9 |
Quick assessment of how well my smoke PM2.5 definition at the grid level correlates at the county level for front range counties. I’ve used the cardiovascular hospitalizations (as they are most common) to plot the assigned county smoke PM2.5 vs grid smoke PM2.5 It looks like the values are similar in some respect, and sometimes not. I would assume the grid level is more accurate since my definition requires the grid be completely impacted by smoke based on the HMS plume.
ggplot(data = filter(co_hosp_list[[6]], !is.na(gpm_smk)),
aes(x=cpm_smk, y =gpm_smk)) +
geom_point() +
ylab('Grid Smoke PM2.5') +
xlab('County Smoke PM2.5') +
ggtitle('Comparison of County vs Grid Smoke PM2.5') +
theme_minimal()
Check how many events have a county PM assigned but not a grid PM assigned.
missing <- co_hosp_list[[6]] %>%
filter(outcome == 1) %>%
mutate(county_miss = ifelse(is.na(pm_krig), 1, 0),
grid_miss = ifelse(is.na(pm25_grid), 1, 0)) %>%
group_by(county_miss, grid_miss) %>%
summarize(n = n()) %>%
mutate(prop = round(n/(69064+8716), 2))
knitr::kable(missing, caption = 'CVD Hospitalization Proportion Missing Grid PM2.5')
| county_miss | grid_miss | n | prop |
|---|---|---|---|
| 0 | 0 | 68356 | 0.88 |
Roughly 10% of subjects with a CVD hospitalization are missing grid PM2.5 which I’m guessing is because Kirk was not able to Geocode for what ever reason.
First things first is to look at the same day association between an increase in smoke PM2.5 and the risk for an inpatient hospitalization.
# extract names
outcome <- c('All Respiratory', 'Asthma', 'COPD', 'Acute Bronchitis',
'Pneumonia', 'All CVD', 'Arrhythmia', 'Cerebrovascular',
'Heart Failure', 'Ischemic Heart Disease',
'Myocardial Infarction')
# outcome order
out_order <- c('All Respiratory', 'Asthma', 'COPD', 'Acute Bronchitis',
'Pneumonia', 'All CVD', 'Arrhythmia', 'Cerebrovascular',
'Heart Failure', 'Ischemic Heart Disease',
'Myocardial Infarction')
# same day results
same_day <- co_hosp_list %>%
map_dfr(., function(df){
out_name <- unique(df$out_name)
result <- broom::tidy(clogit(outcome ~ gpm_smk10unit + o3_8hr_max_ppb +
temp_f_grid + strata(id),
data = df)) %>%
filter(term == 'gpm_smk10unit') %>%
dplyr::select(term, estimate, conf.low, conf.high) %>%
mutate_at(vars(estimate:conf.high), exp)
}) %>%
bind_cols(data.frame(outcome), .) %>%
mutate(outcome = forcats::fct_relevel(outcome, out_order),
group = as.factor(if_else(outcome %in% out_order[1:5],
"Respiratory", "Cardiovascular")))
Same-day plot.
# plot
plot <- ggplot(data=same_day, aes(x=outcome, y = estimate, colour = group)) +
geom_point() +
geom_errorbar(aes(ymin=conf.low, ymax=conf.high), width = 0.3) +
scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
ylab(expression(paste("Odds Ratio: Smoke PM"[2.5], " > 10 ", mu, "g/m"^3))) +
xlab("Outcome") +
theme_bw() +
theme(panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(linetype = "dotted"),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 45, hjust=0.95, vjust = 0.75))
print(plot)
There is no association for most outcomes. Notable observation would be the inverse association between increasing smoke and a lower likelihood of a respiratory hospitalization.
Here is a table for the point estimated and 95%CIs from the plot above. The gpm_smk10unit stands for grid PM2.5 smoke estimate, 10 unit increase.
knitr::kable(dplyr::select(same_day, outcome, estimate:conf.high) %>%
mutate_at(vars(estimate:conf.high), ~round(.x, 3)),
caption = 'Same Day Association with Smoke and Cardiopulmonary Hospitalizations')
| outcome | estimate | conf.low | conf.high |
|---|---|---|---|
| All Respiratory | 0.950 | 0.901 | 1.003 |
| Asthma | 0.989 | 0.854 | 1.145 |
| COPD | 0.912 | 0.791 | 1.050 |
| Acute Bronchitis | 1.141 | 0.872 | 1.495 |
| Pneumonia | 1.028 | 0.935 | 1.130 |
| All CVD | 0.988 | 0.948 | 1.029 |
| Arrhythmia | 1.036 | 0.935 | 1.149 |
| Cerebrovascular | 1.048 | 0.949 | 1.158 |
| Heart Failure | 1.055 | 0.954 | 1.167 |
| Ischemic Heart Disease | 0.972 | 0.877 | 1.079 |
| Myocardial Infarction | 0.980 | 0.895 | 1.074 |
distribute_that_lag <- function(lag_mod, strata, exp_b) {
# output pm basis estimates
parms <- broom::tidy(lag_mod) %>%
filter(stringr::str_detect(term, strata)) %>%
dplyr::select(estimate) %>%
as_vector()
# output estimate names for cov matrix
names <- stringr::str_subset(names(lag_mod$coefficients), strata)
# estimate associations
est <- exp_b %*% parms
# estimate standard error for each interval
# time variable
time <- ((rep(1:length(est))-1))
# covariance matrix for knots
cov_mat <- as.matrix(vcov(lag_mod))[names, names]
# estimate variance of spline
var <- exp_b %*% cov_mat %*% t(exp_b)
# estimate lag ----
# estimate standard error for each lag day for smoke
l_se <- sqrt(diag(var))
# calculate lower and upper bound for smoke
l_est_l95 <- est + (l_se*qnorm(1-0.975))
l_est_u95 <- est + (l_se*qnorm(0.975))
l_type <- "lag"
# lag dataframe
l_df <- data.frame(strata, l_type, time,
exp(est), exp(l_est_l95), exp(l_est_u95),
row.names = NULL)
# assign column names
colnames(l_df) <- c("strata", "type", "time",
"odds_ratio", "lower_95", "upper_95")
# cumulative estimates
c_est <- sapply(seq_along(est), function(x){
sum(est[1:x])
})
# stderr cumulative effect smk
c_se <- sapply(seq_along(c_est), function(y){
sqrt(sum(var[1:y,1:y]))
})
# estimate 95% CI
c_l95 <- c_est+(c_se*qnorm(1-0.975))
c_u95 <- c_est+(c_se*qnorm(0.975))
# type
c_type <- "cumulative"
# return dataframe
c_df <- data.frame(strata, c_type, time, exp(c_est),
exp(c_l95), exp(c_u95), row.names = NULL)
# assign column names
colnames(c_df) <- c("strata", "type", "time",
"odds_ratio", "lower_95", "upper_95")
# bind lagged and cumulative
lag_est <- rbind(l_df, c_df) %>%
mutate(strata = as.character(strata),
type = as.character(type))
# return lagged estimate
return(lag_est)
} # end lag estimate function
Contstrained distributed lag results.
# estimating lagged effects
out_rep <- rep(outcome, each = 12)
dl_results <- lapply(co_hosp_list, function(x){
# output dataframe from list
data <- x %>%
mutate(date = as.Date(date),
outcome = as.numeric(as.character(outcome))) %>%
# remove missing lagged data
filter(!is.na(gpm_smk10unit_lag5))
# create lagged matrix
pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1,
gpm_smk10unit_lag2, gpm_smk10unit_lag3,
gpm_smk10unit_lag4, gpm_smk10unit_lag5))
# temp matrix
temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
temp_f_grid_lag2, temp_f_grid_lag3,
temp_f_grid_lag4, temp_f_grid_lag5))
# ozone matrix
o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3,
o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
# define lagged basis spline
exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
# pm basis
pm_basis <- pm_mat %*% exp_b
# temp basis
temp_basis <- temp_mat %*% exp_b
# ozone basis
o3_basis <- o3_mat %*% exp_b
# run lagged model
lag_mod <- clogit(outcome ~ pm_basis + o3_basis + temp_basis + strata(id),
data = data)
# estimate lag estimate
lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b = exp_b)
return(lag_est)
}) %>% #end lappply
# bind rows
map_dfr(.,rbind) %>%
# bind in outcome names
bind_cols(data.frame(out_rep), .)
# get the cumulative effect over 6 days
cumulative_results <- filter(dl_results, type == 'cumulative' & time == 5) %>%
dplyr::select(-strata, -time) %>%
mutate(outcome = forcats::fct_relevel(outcome, out_order),
group = as.factor(if_else(outcome %in% out_order[1:5],
"Respiratory", "Cardiovascular")))
Cumulative effect of 0 to 5 days of smoke exposure for a 10 ug/m^3.
# plot
plot <- ggplot(data = cumulative_results,
aes(x=outcome, y = odds_ratio, color = group)) +
geom_point() +
geom_errorbar(aes(ymin=lower_95, ymax=upper_95), width = 0.3) +
scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
xlab("Outcome") +
theme_bw()+
theme(panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(linetype = "dotted"),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 45, hjust=0.95, vjust = 0.75,
size = 10))
print(plot)
There may be an association with hospitalizations for asthma and certain cardiovascular events (cerebrovascular, heart failure, IHD) for every 10 ug/m^3 of smoke PM2.5 over the course of 6 days of smoke exposure.
Table of cumulative effects results for a 10 ug/m^3 increase in smoke PM2.5
knitr::kable(
dplyr::select(cumulative_results, outcome, type:upper_95) %>%
mutate_at(vars(odds_ratio:upper_95), ~round(.x, 3)),
caption = 'Cumulative effect of smoke on Cardiopulmonary Hospitalizations')
| outcome | type | odds_ratio | lower_95 | upper_95 |
|---|---|---|---|---|
| All Respiratory | cumulative | 1.029 | 0.949 | 1.116 |
| Asthma | cumulative | 1.284 | 1.040 | 1.584 |
| COPD | cumulative | 1.053 | 0.849 | 1.306 |
| Acute Bronchitis | cumulative | 1.422 | 0.927 | 2.181 |
| Pneumonia | cumulative | 1.035 | 0.894 | 1.198 |
| All CVD | cumulative | 1.039 | 0.977 | 1.106 |
| Arrhythmia | cumulative | 1.085 | 0.922 | 1.278 |
| Cerebrovascular | cumulative | 1.183 | 1.013 | 1.381 |
| Heart Failure | cumulative | 1.195 | 1.021 | 1.399 |
| Ischemic Heart Disease | cumulative | 1.052 | 0.901 | 1.228 |
| Myocardial Infarction | cumulative | 0.981 | 0.854 | 1.127 |
Creating a plot showing the individual lagged day effects on cardiopulmonary hospitalizations. This is a constrained distributed lag model, meaning all lagged days are accounted for in the model.
constrained_lag_results <- dl_results %>%
mutate(outcome = forcats::fct_relevel(out_rep, out_order),
group = as.factor(if_else(outcome %in% out_order[1:5],
"Respiratory", "Cardiovascular"))) %>%
filter(type == 'lag')
# constrained distributed lag
dl_hosp_plot <- ggplot(data=constrained_lag_results,
aes(x=time, y=odds_ratio, group = group, color = group, fill = group)) +
geom_line(size = 1) +
geom_ribbon(aes(ymin = lower_95, ymax = upper_95), color = 'transparent',
alpha = 0.5) +
facet_wrap(~outcome, scales = 'free_y', ncol = 4) +
scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
scale_fill_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
xlab("Lagged Days") +
theme_bw()+
theme(panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(linetype = "dotted"),
legend.direction = 'horizontal',
legend.position = 'bottom',
panel.grid.minor = element_blank(),
strip.background = element_blank())
print(dl_hosp_plot)
For all respiratory and asthma, there appears to be a inverse association (or trending towards an inverse association) on the same day of exposure, with a steady increase in the risk on day 3. I think the acute bronchitis and pneumonia signals are noisy and the day 3 association is not likely real (suggesting that maybe my model decision is not great). It could be that I need to further reduce the lagged days evaluated to maybe 0 to 2 since the smoke events are pretty brief.
knitr::kable(
dplyr::select(constrained_lag_results, outcome, time:upper_95) %>%
mutate_at(vars(odds_ratio:upper_95), ~round(.x, 3)),
caption = 'Distributed Lag Daily Associations with Smoke and Cardiopulmonary Hospitalizations')
| outcome | time | odds_ratio | lower_95 | upper_95 |
|---|---|---|---|---|
| All Respiratory | 0 | 0.918 | 0.875 | 0.962 |
| All Respiratory | 1 | 0.985 | 0.964 | 1.007 |
| All Respiratory | 2 | 1.038 | 1.009 | 1.069 |
| All Respiratory | 3 | 1.057 | 1.026 | 1.088 |
| All Respiratory | 4 | 1.037 | 1.016 | 1.059 |
| All Respiratory | 5 | 1.000 | 0.955 | 1.047 |
| Asthma | 0 | 0.913 | 0.802 | 1.040 |
| Asthma | 1 | 1.016 | 0.961 | 1.075 |
| Asthma | 2 | 1.099 | 1.018 | 1.187 |
| Asthma | 3 | 1.125 | 1.042 | 1.215 |
| Asthma | 4 | 1.090 | 1.032 | 1.150 |
| Asthma | 5 | 1.026 | 0.906 | 1.162 |
| COPD | 0 | 0.891 | 0.786 | 1.010 |
| COPD | 1 | 0.983 | 0.928 | 1.041 |
| COPD | 2 | 1.057 | 0.979 | 1.142 |
| COPD | 3 | 1.082 | 1.002 | 1.168 |
| COPD | 4 | 1.053 | 0.996 | 1.113 |
| COPD | 5 | 0.999 | 0.884 | 1.130 |
| Acute Bronchitis | 0 | 1.000 | 0.781 | 1.281 |
| Acute Bronchitis | 1 | 1.009 | 0.908 | 1.122 |
| Acute Bronchitis | 2 | 1.026 | 0.880 | 1.195 |
| Acute Bronchitis | 3 | 1.058 | 0.906 | 1.237 |
| Acute Bronchitis | 4 | 1.109 | 0.992 | 1.239 |
| Acute Bronchitis | 5 | 1.170 | 0.917 | 1.493 |
| Pneumonia | 0 | 0.981 | 0.902 | 1.067 |
| Pneumonia | 1 | 1.016 | 0.977 | 1.057 |
| Pneumonia | 2 | 1.039 | 0.986 | 1.095 |
| Pneumonia | 3 | 1.035 | 0.982 | 1.090 |
| Pneumonia | 4 | 1.004 | 0.967 | 1.043 |
| Pneumonia | 5 | 0.961 | 0.884 | 1.045 |
| All CVD | 0 | 0.984 | 0.949 | 1.020 |
| All CVD | 1 | 1.001 | 0.985 | 1.018 |
| All CVD | 2 | 1.014 | 0.992 | 1.037 |
| All CVD | 3 | 1.019 | 0.996 | 1.042 |
| All CVD | 4 | 1.015 | 0.999 | 1.031 |
| All CVD | 5 | 1.007 | 0.971 | 1.043 |
| Arrhythmia | 0 | 1.058 | 0.965 | 1.160 |
| Arrhythmia | 1 | 0.988 | 0.946 | 1.032 |
| Arrhythmia | 2 | 0.948 | 0.893 | 1.006 |
| Arrhythmia | 3 | 0.958 | 0.904 | 1.016 |
| Arrhythmia | 4 | 1.022 | 0.979 | 1.066 |
| Arrhythmia | 5 | 1.119 | 1.019 | 1.227 |
| Cerebrovascular | 0 | 1.035 | 0.948 | 1.131 |
| Cerebrovascular | 1 | 1.026 | 0.985 | 1.068 |
| Cerebrovascular | 2 | 1.020 | 0.964 | 1.078 |
| Cerebrovascular | 3 | 1.021 | 0.965 | 1.079 |
| Cerebrovascular | 4 | 1.029 | 0.987 | 1.072 |
| Cerebrovascular | 5 | 1.040 | 0.951 | 1.138 |
| Heart Failure | 0 | 1.038 | 0.949 | 1.136 |
| Heart Failure | 1 | 1.026 | 0.985 | 1.070 |
| Heart Failure | 2 | 1.019 | 0.963 | 1.079 |
| Heart Failure | 3 | 1.021 | 0.964 | 1.080 |
| Heart Failure | 4 | 1.031 | 0.989 | 1.075 |
| Heart Failure | 5 | 1.046 | 0.955 | 1.146 |
| Ischemic Heart Disease | 0 | 0.921 | 0.841 | 1.010 |
| Ischemic Heart Disease | 1 | 1.023 | 0.983 | 1.066 |
| Ischemic Heart Disease | 2 | 1.096 | 1.038 | 1.158 |
| Ischemic Heart Disease | 3 | 1.094 | 1.036 | 1.156 |
| Ischemic Heart Disease | 4 | 1.018 | 0.978 | 1.060 |
| Ischemic Heart Disease | 5 | 0.913 | 0.834 | 1.000 |
| Myocardial Infarction | 0 | 0.950 | 0.876 | 1.031 |
| Myocardial Infarction | 1 | 0.999 | 0.963 | 1.037 |
| Myocardial Infarction | 2 | 1.034 | 0.983 | 1.087 |
| Myocardial Infarction | 3 | 1.036 | 0.985 | 1.089 |
| Myocardial Infarction | 4 | 1.005 | 0.970 | 1.042 |
| Myocardial Infarction | 5 | 0.959 | 0.885 | 1.040 |
Running an unconstrained lag model where I model the cooresponding lagged day of smoke, adjusting for the same lag day of temperature and ozone. This model is for comparison with our 2012 Washington paper and to see how sensitive our distributed lag models are.
# repeat outcomes 4 times
out_rep <- rep(outcome, each = 6)
# day to repeat
day <- rep(0:5, times = 11)
# lag day vector
lag_days <- c('', '_lag1', '_lag2', '_lag3', '_lag4', '_lag5')
# map across lagged days
hosp_uc_lag_results <- map_dfr(co_hosp_list, function(df){
data <- df
results <- map_dfr(lag_days, function(x){
# define function
f <- as.formula(paste('outcome~',
paste(paste0(c('gpm_smk10unit', 'o3_8hr_max_ppb', 'temp_f_grid'),
x), collapse = '+'),
'+strata(id)'))
r <- broom::tidy(clogit(f, data = data)) %>%
filter(term == paste0('gpm_smk10unit',x)) %>%
dplyr::select(term, estimate, conf.low, conf.high) %>%
mutate_at(vars(estimate:conf.high), exp)
})
}) %>%
cbind(out_rep, day, .) %>%
mutate(outcome = forcats::fct_relevel(out_rep, out_order),
group = as.factor(if_else(outcome %in% out_order[1:5],
"Respiratory", "Cardiovascular")))
Plot of unconstrained lag results for hospitalizations.
uc_plot <- ggplot(data=hosp_uc_lag_results, aes(x=day, y=estimate,
group = group, color = group)) +
geom_point() +
geom_errorbar(aes(ymin=conf.low, ymax=conf.high), width = 0.3) +
facet_wrap(~outcome, scales = 'free_y', ncol = 4) +
scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
xlab("Lagged Days") +
theme_bw()+
theme(panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(linetype = "dotted"),
legend.direction = 'horizontal',
legend.position = 'bottom',
panel.grid.minor = element_blank(),
strip.background = element_blank())
print(uc_plot)
In the context of the distributed lag models, this may help confirm some of the associations. I think the respiratory and asthma trends are supported by these plots. Same with ischemic heart disease.
Unconstrained lag associations table.
knitr::kable(dplyr::select(hosp_uc_lag_results, outcome, day, estimate:conf.high) %>%
mutate_at(vars(estimate:conf.high), ~round(.x, 3)),
caption = 'Unconstrained Lag Daily Associations with Smoke and Cardiopulmonary Hospitalizations')
| outcome | day | estimate | conf.low | conf.high |
|---|---|---|---|---|
| All Respiratory | 0 | 0.950 | 0.901 | 1.003 |
| All Respiratory | 1 | 0.993 | 0.943 | 1.046 |
| All Respiratory | 2 | 1.040 | 0.988 | 1.094 |
| All Respiratory | 3 | 1.106 | 1.053 | 1.162 |
| All Respiratory | 4 | 1.060 | 1.007 | 1.116 |
| All Respiratory | 5 | 1.036 | 0.984 | 1.090 |
| Asthma | 0 | 0.989 | 0.854 | 1.145 |
| Asthma | 1 | 1.101 | 0.968 | 1.251 |
| Asthma | 2 | 1.197 | 1.049 | 1.366 |
| Asthma | 3 | 1.259 | 1.109 | 1.430 |
| Asthma | 4 | 1.146 | 1.000 | 1.313 |
| Asthma | 5 | 1.185 | 1.035 | 1.356 |
| COPD | 0 | 0.912 | 0.791 | 1.050 |
| COPD | 1 | 1.069 | 0.935 | 1.222 |
| COPD | 2 | 1.005 | 0.880 | 1.148 |
| COPD | 3 | 1.148 | 1.004 | 1.312 |
| COPD | 4 | 1.137 | 0.997 | 1.297 |
| COPD | 5 | 1.031 | 0.898 | 1.183 |
| Acute Bronchitis | 0 | 1.141 | 0.872 | 1.495 |
| Acute Bronchitis | 1 | 0.979 | 0.741 | 1.293 |
| Acute Bronchitis | 2 | 1.099 | 0.837 | 1.442 |
| Acute Bronchitis | 3 | 1.374 | 1.049 | 1.801 |
| Acute Bronchitis | 4 | 1.328 | 1.004 | 1.757 |
| Acute Bronchitis | 5 | 1.201 | 0.909 | 1.588 |
| Pneumonia | 0 | 1.028 | 0.935 | 1.130 |
| Pneumonia | 1 | 1.018 | 0.928 | 1.117 |
| Pneumonia | 2 | 1.046 | 0.952 | 1.149 |
| Pneumonia | 3 | 1.084 | 0.992 | 1.185 |
| Pneumonia | 4 | 1.020 | 0.931 | 1.118 |
| Pneumonia | 5 | 0.971 | 0.885 | 1.066 |
| All CVD | 0 | 0.988 | 0.948 | 1.029 |
| All CVD | 1 | 1.050 | 1.010 | 1.091 |
| All CVD | 2 | 1.015 | 0.975 | 1.056 |
| All CVD | 3 | 1.029 | 0.991 | 1.069 |
| All CVD | 4 | 1.038 | 0.997 | 1.080 |
| All CVD | 5 | 1.019 | 0.980 | 1.060 |
| Arrhythmia | 0 | 1.036 | 0.935 | 1.149 |
| Arrhythmia | 1 | 1.020 | 0.920 | 1.132 |
| Arrhythmia | 2 | 0.954 | 0.859 | 1.059 |
| Arrhythmia | 3 | 0.984 | 0.889 | 1.089 |
| Arrhythmia | 4 | 1.094 | 0.983 | 1.219 |
| Arrhythmia | 5 | 1.078 | 0.972 | 1.197 |
| Cerebrovascular | 0 | 1.048 | 0.949 | 1.158 |
| Cerebrovascular | 1 | 1.122 | 1.020 | 1.235 |
| Cerebrovascular | 2 | 1.050 | 0.953 | 1.157 |
| Cerebrovascular | 3 | 1.078 | 0.978 | 1.189 |
| Cerebrovascular | 4 | 1.073 | 0.969 | 1.189 |
| Cerebrovascular | 5 | 1.077 | 0.975 | 1.189 |
| Heart Failure | 0 | 1.055 | 0.954 | 1.167 |
| Heart Failure | 1 | 1.122 | 1.018 | 1.237 |
| Heart Failure | 2 | 1.050 | 0.952 | 1.159 |
| Heart Failure | 3 | 1.090 | 0.987 | 1.203 |
| Heart Failure | 4 | 1.078 | 0.972 | 1.196 |
| Heart Failure | 5 | 1.082 | 0.978 | 1.197 |
| Ischemic Heart Disease | 0 | 0.972 | 0.877 | 1.079 |
| Ischemic Heart Disease | 1 | 1.104 | 1.007 | 1.210 |
| Ischemic Heart Disease | 2 | 1.101 | 1.000 | 1.213 |
| Ischemic Heart Disease | 3 | 1.124 | 1.025 | 1.232 |
| Ischemic Heart Disease | 4 | 1.056 | 0.959 | 1.163 |
| Ischemic Heart Disease | 5 | 0.961 | 0.868 | 1.064 |
| Myocardial Infarction | 0 | 0.980 | 0.895 | 1.074 |
| Myocardial Infarction | 1 | 1.002 | 0.918 | 1.094 |
| Myocardial Infarction | 2 | 1.018 | 0.932 | 1.112 |
| Myocardial Infarction | 3 | 1.069 | 0.984 | 1.160 |
| Myocardial Infarction | 4 | 0.965 | 0.881 | 1.057 |
| Myocardial Infarction | 5 | 0.993 | 0.908 | 1.085 |
Using the distributed lag model for each sex strata. Also modifying code to pull out n cases analyzed.
# estimating lagged effects
out_rep <- rep(outcome, each = 24)
sex_strata <- c('F', 'M')
sex_results <- map_dfr(co_hosp_list, function(df){
results <- map_dfr(sex_strata, function(x){
data <- df %>%
filter(sex == x) %>%
mutate(date = as.Date(date),
outcome = as.numeric(as.character(outcome))) %>%
# remove missing lagged data
filter(!is.na(gpm_smk10unit_lag5))
# create lagged matrix
pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1,
gpm_smk10unit_lag2, gpm_smk10unit_lag3,
gpm_smk10unit_lag4, gpm_smk10unit_lag5))
# temp matrix
temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
temp_f_grid_lag2, temp_f_grid_lag3,
temp_f_grid_lag4, temp_f_grid_lag5))
# ozone matrix
o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3,
o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
# define lagged basis spline
exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
# pm basis
pm_basis <- pm_mat %*% exp_b
# temp basis
temp_basis <- temp_mat %*% exp_b
# ozone basis
o3_basis <- o3_mat %*% exp_b
# run lagged model
lag_mod <- clogit(outcome ~ pm_basis + o3_basis + temp_basis + strata(id),
data = data)
# estimate lag estimate
lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) %>%
mutate(sex = x)
return(lag_est)
})
}) %>%
# bind in outcome names
bind_cols(data.frame(out_rep), .)
Sex-specific plots of the cumulative effect.
# subset to cumulative results
cumulative_sex_strata <- sex_results %>%
filter(type == 'cumulative' & time == 5) %>%
mutate(sex_long = if_else(sex == 'F', 'Female', 'Male'),
outcome = forcats::fct_relevel(out_rep, out_order),
group = as.factor(if_else(outcome %in% out_order[1:5],
"Respiratory", "Cardiovascular")))
# plot
sex_plot <- ggplot(data=cumulative_sex_strata, aes(x=sex_long, y=odds_ratio,
group = group, color = group)) +
geom_point() +
geom_errorbar(aes(ymin=lower_95, ymax=upper_95), width = 0.3) +
facet_wrap(~outcome, scales = 'free_y', ncol = 4) +
scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
xlab("Sex") +
theme_bw()+
theme(panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(linetype = "dotted"),
legend.direction = 'horizontal',
legend.position = 'bottom',
panel.grid.minor = element_blank(),
strip.background = element_blank())
print(sex_plot)
Sex-specific estimates table.
knitr::kable(dplyr::select(cumulative_sex_strata, outcome, sex_long, odds_ratio:upper_95) %>%
mutate_at(vars(odds_ratio:upper_95), ~round(.x, 3)),
caption = 'Sex-specific cumulative effect of smoke on hosptializations.')
| outcome | sex_long | odds_ratio | lower_95 | upper_95 |
|---|---|---|---|---|
| All Respiratory | Female | 1.005 | 0.897 | 1.125 |
| All Respiratory | Male | 1.055 | 0.939 | 1.185 |
| Asthma | Female | 1.268 | 0.945 | 1.701 |
| Asthma | Male | 1.292 | 0.954 | 1.749 |
| COPD | Female | 1.008 | 0.743 | 1.369 |
| COPD | Male | 1.095 | 0.807 | 1.487 |
| Acute Bronchitis | Female | 1.190 | 0.629 | 2.250 |
| Acute Bronchitis | Male | 1.683 | 0.937 | 3.022 |
| Pneumonia | Female | 1.005 | 0.820 | 1.232 |
| Pneumonia | Male | 1.066 | 0.863 | 1.316 |
| All CVD | Female | 1.041 | 0.951 | 1.139 |
| All CVD | Male | 1.037 | 0.952 | 1.130 |
| Arrhythmia | Female | 1.229 | 0.980 | 1.541 |
| Arrhythmia | Male | 0.950 | 0.751 | 1.203 |
| Cerebrovascular | Female | 1.188 | 0.906 | 1.557 |
| Cerebrovascular | Male | 1.178 | 0.975 | 1.423 |
| Heart Failure | Female | 1.215 | 0.919 | 1.605 |
| Heart Failure | Male | 1.185 | 0.979 | 1.434 |
| Ischemic Heart Disease | Female | 1.071 | 0.859 | 1.334 |
| Ischemic Heart Disease | Male | 1.035 | 0.833 | 1.287 |
| Myocardial Infarction | Female | 1.060 | 0.881 | 1.275 |
| Myocardial Infarction | Male | 0.883 | 0.715 | 1.090 |
Age category stratified results.
# estimating lagged effects
out_rep <- rep(outcome, each = 36)
age_strata <- c('age_under_15', 'age_15_to_65', 'age_over_65')
age_results <- map_dfr(co_hosp_list, function(df){
results <- map_dfr(age_strata, function(x){
data <- df %>%
filter(age_cat == x) %>%
mutate(date = as.Date(date),
outcome = as.numeric(as.character(outcome))) %>%
# remove missing lagged data
filter(!is.na(gpm_smk10unit_lag5))
# create lagged matrix
pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1,
gpm_smk10unit_lag2, gpm_smk10unit_lag3,
gpm_smk10unit_lag4, gpm_smk10unit_lag5))
# temp matrix
temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
temp_f_grid_lag2, temp_f_grid_lag3,
temp_f_grid_lag4, temp_f_grid_lag5))
# ozone matrix
o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3,
o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
# define lagged basis spline
exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
# pm basis
pm_basis <- pm_mat %*% exp_b
# temp basis
temp_basis <- temp_mat %*% exp_b
# ozone basis
o3_basis <- o3_mat %*% exp_b
# run lagged model
lag_mod <- tryCatch(clogit(outcome ~ pm_basis + o3_basis + temp_basis + strata(id),
data = data))
# estimate lag estimate
lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) %>%
mutate(age_cat = x)
return(lag_est)
})
}) %>%
# bind in outcome names
bind_cols(data.frame(out_rep), .)
Age-specific plots of the cumulative effect.
# subset to cumulative results
cumulative_age_strata <- age_results %>%
filter(type == 'cumulative' & time == 5) %>%
mutate(age_long = forcats::fct_relevel(
case_when(age_cat == 'age_under_15' ~ 'Age < 15',
age_cat == 'age_15_to_65' ~ 'Age 15 to 64',
age_cat == 'age_over_65' ~ 'Age > 64'),
c('Age < 15', 'Age 15 to 64', 'Age > 64')),
outcome = forcats::fct_relevel(out_rep, out_order),
group = as.factor(if_else(outcome %in% out_order[1:5],
"Respiratory", "Cardiovascular"))) %>%
filter(upper_95 < 3.5)
# plot
age_plot <- ggplot(data=cumulative_age_strata, aes(x=age_long, y=odds_ratio,
group = group, color = group)) +
geom_point() +
geom_errorbar(aes(ymin=lower_95, ymax=upper_95), width = 0.3) +
facet_wrap(~outcome, scales = 'free_y', ncol = 4) +
scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
xlab("Sex") +
theme_bw()+
theme(panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(linetype = "dotted"),
legend.direction = 'horizontal',
legend.position = 'bottom',
panel.grid.minor = element_blank(),
strip.background = element_blank(),
axis.text.x = element_text(angle = 45, hjust=0.95, vjust = 0.75))
print(age_plot)
Age-specific estimates table.
knitr::kable(dplyr::select(cumulative_age_strata, outcome, age_long, odds_ratio:upper_95) %>%
mutate_at(vars(odds_ratio:upper_95), ~round(.x, 3)),
caption = 'Age-specific cumulative effect of smoke on hosptializations.')
| outcome | age_long | odds_ratio | lower_95 | upper_95 |
|---|---|---|---|---|
| All Respiratory | Age < 15 | 1.271 | 0.931 | 1.734 |
| All Respiratory | Age 15 to 64 | 0.978 | 0.865 | 1.106 |
| All Respiratory | Age > 64 | 1.044 | 0.931 | 1.171 |
| Asthma | Age < 15 | 0.526 | 0.240 | 1.156 |
| Asthma | Age 15 to 64 | 1.338 | 1.035 | 1.731 |
| Asthma | Age > 64 | 1.510 | 0.993 | 2.298 |
| COPD | Age 15 to 64 | 1.231 | 0.878 | 1.726 |
| COPD | Age > 64 | 0.945 | 0.712 | 1.252 |
| Acute Bronchitis | Age 15 to 64 | 0.334 | 0.102 | 1.090 |
| Acute Bronchitis | Age > 64 | 0.626 | 0.161 | 2.436 |
| Pneumonia | Age < 15 | 0.803 | 0.368 | 1.752 |
| Pneumonia | Age 15 to 64 | 0.883 | 0.691 | 1.128 |
| Pneumonia | Age > 64 | 1.155 | 0.957 | 1.394 |
| All CVD | Age < 15 | 0.944 | 0.359 | 2.482 |
| All CVD | Age 15 to 64 | 1.037 | 0.939 | 1.144 |
| All CVD | Age > 64 | 1.042 | 0.962 | 1.128 |
| Arrhythmia | Age 15 to 64 | 0.858 | 0.632 | 1.165 |
| Arrhythmia | Age > 64 | 1.196 | 0.986 | 1.453 |
| Cerebrovascular | Age 15 to 64 | 1.192 | 0.948 | 1.500 |
| Cerebrovascular | Age > 64 | 1.173 | 0.950 | 1.447 |
| Heart Failure | Age 15 to 64 | 1.204 | 0.953 | 1.522 |
| Heart Failure | Age > 64 | 1.185 | 0.957 | 1.467 |
| Ischemic Heart Disease | Age 15 to 64 | 0.938 | 0.689 | 1.278 |
| Ischemic Heart Disease | Age > 64 | 1.093 | 0.913 | 1.309 |
| Myocardial Infarction | Age 15 to 64 | 1.062 | 0.837 | 1.349 |
| Myocardial Infarction | Age > 64 | 0.943 | 0.795 | 1.119 |
Since we have mortality data, I’m also going to evaluate the association between grid-level smoke PM2.5 and cardiopulmonary mortality. I’ve created time-stratified case-crossover data frames of cardiopulmonary mortality events with referent period within the same month. I’ve also limit mortality case-crossover events to Front Range counties from May to October from 2010 to 2015.
# load casecrossover list -----
load("../../data/health/co_mortality_cc_list.RData")
# load mortality outcome list
load("../../data/health/icd10_outcome.RData")
# extract a vector of outcome names from the icd10 outcomes list
outcomes <- names(icd10_outcomes)
# reduce case-crossover list to only summer months and join pm
co_mortality_list <- casecross_list %>%
# desired format to make sure it's right
map(~ mutate(., outcome = as.numeric(as.character(outcome)),
date = as.Date(as.character(date_of_death)),
month = as.factor(lubridate::month(date))) %>%
# filter out 2016; I don't have pm data yet
filter(date <= "2015-12-30") %>%
filter(fips %in% fr_fips)) %>%
# add outcome name to each dataframe
map2(.x = ., .y = outcomes, ~mutate(.x, out_name = .y))
Time series of events for respiratory and cvd only. I also aggregated across front range counties for sufficient numbers. Looking for any potential time trends that may need to be accounted for.
# estimate deaths
ts_death <- co_mortality_list %>%
map_dfr(. , function(df){
counts <- df %>%
filter(outcome == 1) %>%
group_by(out_name, date) %>%
summarise(n = n())
}) %>%
filter(out_name %in% c('resp', 'cvd'))
Mortality time-series plot.
ts_death_plot <- ggplot(data=ts_death, aes(x=date, y=n)) +
geom_point() +
facet_wrap(~out_name) +
theme_minimal()
ts_death_plot
There is a general increase in cardiovascular deaths over time, which I’m guessing corresponds with the population increase of Colorado. Rates probably would show a more stable rate. Interesting observation in the respiratory related morbidity is the spike at the end of 2014, which I think is because of the bad flu season. I remember Kirk saying this.
I am going to limit mortality to May to October months in 2010 to 2015 and join with PM2.5 estimates.
# extract names
cause_death <- c('Respiratory', 'Asthma', 'COPD', 'CVD', 'Heart Failure',
'Cardiac Arrest', 'Ischemic Heart Disease',
'Myocardial Infarction', 'Cerebrovascular')
# reduce case-crossover list to only summer months and join GRID ID
co_death_list <- co_mortality_list %>%
# filter out 2016; I don't have pm data yet
map(~ filter(., date <= "2015-12-30") %>%
filter(month %in% 5:10) %>%
filter(fips %in% fr_fips) %>%
mutate(WRFGRID_ID = as.character(wrfgrid_id),
# age category
age_cat = case_when(age < 15 ~ 'age_under_15',
age >= 15 & age < 65 ~ 'age_15_to_65',
age >= 65 ~ 'age_over_65')) %>%
dplyr::select(-month, -wrfgrid_id) %>%
# join with grid key
left_join(grid_key, by = 'WRFGRID_ID') %>%
left_join(co_pm, by = c("fips", "date")) %>%
# left join grid pm
left_join(grid_pm, by = c("GRID_ID", "date")) %>%
left_join(ozone, by = c("GRID_ID", "date"))) %>%
# add outcome name to each dataframe
map2(.x = ., .y = cause_death, ~mutate(.x, out_name = .y))
Number of cardiopulmonary deaths in Front Range counties during this time period.
strata <- c('All', 'Sex', 'Age')
# estimate deaths
death_counts <- co_death_list %>%
map_dfr(.,function(df){
map_dfr(strata, function(x){
if(x == 'All'){
counts <- df %>%
filter(outcome == 1) %>%
group_by(out_name) %>%
summarise(n_events = n()) %>%
mutate(strata = x) %>%
dplyr::select(out_name, strata, n_events)
} else if(x == 'Sex'){
counts <- df %>%
filter(outcome == 1) %>%
group_by(out_name, sex) %>%
summarise(n_events = n()) %>%
rename(strata = sex) %>%
dplyr::select(out_name, strata, n_events)
} else {
counts <- df %>%
filter(outcome == 1) %>%
group_by(out_name, age_cat) %>%
summarise(n_events = n()) %>%
rename(strata = age_cat) %>%
dplyr::select(out_name, strata, n_events)
}
}) # end strata map
}) %>% # end outcome list map
spread(strata, n_events) %>%
dplyr::select(out_name, All, F, M, age_under_15, age_15_to_65, age_over_65) %>%
mutate_at(vars(F:age_over_65), funs(round((./All)*100,1)))
# print death counts
knitr::kable(death_counts, caption='Front Range Cardiopulmonary Deaths and Strata Proportion')
| out_name | All | F | M | age_under_15 | age_15_to_65 | age_over_65 |
|---|---|---|---|---|---|---|
| Asthma | 101 | 68.3 | 31.7 | 4.0 | 44.6 | 51.5 |
| Cardiac Arrest | 157 | 58.0 | 42.0 | NA | 18.5 | 81.5 |
| Cerebrovascular | 3443 | 60.1 | 39.9 | 0.3 | 14.6 | 85.1 |
| COPD | 4056 | 51.2 | 48.8 | NA | 12.4 | 87.6 |
| CVD | 18122 | 49.9 | 50.1 | 0.2 | 20.0 | 79.8 |
| Heart Failure | 1395 | 59.5 | 40.5 | 0.1 | 6.3 | 93.6 |
| Ischemic Heart Disease | 7520 | 39.9 | 60.1 | 0.0 | 23.2 | 76.8 |
| Myocardial Infarction | 1948 | 40.9 | 59.1 | NA | 25.4 | 74.6 |
| Respiratory | 7025 | 50.5 | 49.5 | 0.4 | 14.9 | 84.7 |
Assessing how many CVD deaths are missing a grid assignment but were assigned a county PM2.5. Not that many missing (~2%).
missing <- co_death_list[[4]] %>%
filter(outcome == 1) %>%
mutate(county_miss = ifelse(is.na(pm_krig), 1, 0),
grid_miss = ifelse(is.na(pm25_grid), 1, 0)) %>%
group_by(county_miss, grid_miss) %>%
summarize(n = n()) %>%
mutate(prop = round(n/(17720+402),2))
knitr::kable(missing, caption = 'CVD mortality missing grid PM2.5')
| county_miss | grid_miss | n | prop |
|---|---|---|---|
| 0 | 0 | 17493 | 0.97 |
| 0 | 1 | 629 | 0.03 |
Same analysis as hospitalizations where likelihood of a cardiopulmonary death is regressed a 10 ug/m^3 increase in smoke PM2.5. Model accounts for within subject variability and adjusts for temperature.
# same day results
same_day_death <- co_death_list %>%
map_dfr(., function(df){
out_name <- unique(df$out_name)
result <- broom::tidy(clogit(outcome ~ gpm_smk10unit + o3_8hr_max_ppb +
temp_f_grid + strata(id),
data = df)) %>%
filter(term == 'gpm_smk10unit') %>%
dplyr::select(term, estimate, conf.low, conf.high) %>%
mutate_at(vars(estimate:conf.high), exp) %>%
mutate(outcome_name = out_name)
}) %>%
mutate(outcome_name = forcats::fct_relevel(outcome_name, .$outcome_name),
group = as.factor(ifelse(outcome_name %in% c('Respiratory', 'Asthma', 'COPD'),
'Respiratory', 'Cardiovascular')))
Mortality same day association plot.
death_plot <- ggplot(data=same_day_death, aes(x=outcome_name, y = estimate,
colour = group)) +
geom_point() +
geom_errorbar(aes(ymin=conf.low, ymax=conf.high), width = 0.3) +
scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
xlab("Underlying Cause of Death") +
theme_bw()+
theme(panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(linetype = "dotted"),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 45, hjust=0.95, vjust = 0.75))
print(death_plot)
Note there are very few deaths with underlying cause of death as asthma and cardiac arrest and thus a lot of variability. I’m going to plot without these two outcomes so it’s easier to read.
Underlying cause of death plot I’ll use in the paper.
death_plot2 <- ggplot(data = filter(same_day_death,
!(outcome_name %in% c('Asthma', 'Cardiac Arrest'))),
aes(x=outcome_name, y = estimate, colour = group)) +
geom_point() +
geom_errorbar(aes(ymin=conf.low, ymax=conf.high), width = 0.3) +
scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
xlab("Underlying Cause of Death") +
theme_bw()+
theme(panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(linetype = "dotted"),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 45, hjust=0.95, vjust = 0.75))
print(death_plot2)
No significant associations with a 10 ug/m^3 increase in smoke PM2.5 and cardiopulmonary morbidity.
Here is a table of the same-day mortality association.
knitr::kable(dplyr::select(same_day_death, outcome_name, estimate:conf.high) %>%
mutate_at(vars(estimate:conf.high), ~round(.x, 3)),
caption = "Same Day Association with Smoke and Mortality")
| outcome_name | estimate | conf.low | conf.high |
|---|---|---|---|
| Respiratory | 1.072 | 0.943 | 1.219 |
| Asthma | 1.746 | 0.788 | 3.868 |
| COPD | 1.014 | 0.862 | 1.192 |
| CVD | 1.024 | 0.944 | 1.111 |
| Heart Failure | 0.935 | 0.699 | 1.252 |
| Cardiac Arrest | 2.932 | 1.111 | 7.740 |
| Ischemic Heart Disease | 0.968 | 0.854 | 1.099 |
| Myocardial Infarction | 1.168 | 0.900 | 1.516 |
| Cerebrovascular | 1.135 | 0.940 | 1.370 |
Looking at cumulative exposure of 0 to 5 lagged days of smoke exposure on risk for cardiopulmonary death.
cod <- rep(cause_death, each = 12)
dl_death_results <- lapply(co_death_list, function(x){
# output dataframe from list
data <- x %>%
mutate(date = as.Date(date),
outcome = as.numeric(as.character(outcome))) %>%
# remove missing lagged data
filter(!is.na(gpm_smk10unit_lag5))
# create lagged matrix
pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1,
gpm_smk10unit_lag2, gpm_smk10unit_lag3,
gpm_smk10unit_lag4, gpm_smk10unit_lag5))
# temp matrix
temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
temp_f_grid_lag2, temp_f_grid_lag3,
temp_f_grid_lag4, temp_f_grid_lag5))
# ozone matrix
o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3,
o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
# define lagged basis spline
exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
# pm basis
pm_basis <- pm_mat %*% exp_b
# temp basis
temp_basis <- temp_mat %*% exp_b
# ozone basis
o3_basis <- o3_mat %*% exp_b
# run lagged model
lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)
# estimate lag estimate
lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b)
return(lag_est)
}) %>% #end lappply
# bind rows
map_dfr(.,rbind) %>%
# bind in outcome names
bind_cols(data.frame(cod), .)
Cumulative effect death.
cumulative_death_results <- filter(dl_death_results,
type == 'cumulative' & time == 5) %>%
dplyr::select(-strata, -time) %>%
mutate(outcome = forcats::fct_relevel(cod, cause_death),
group = as.factor(if_else(cod %in% cause_death[1:3],
"Respiratory", "Cardiovascular"))) %>%
filter(!(outcome %in% c('Asthma', 'Cardiac Arrest')))
Plot of cumulative effect.
# plot
cumulative_death_plot <- ggplot(data = cumulative_death_results,
aes(x=outcome, y = odds_ratio, color = group)) +
geom_point() +
geom_errorbar(aes(ymin=lower_95, ymax=upper_95), width = 0.3) +
scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
ylab(expression(paste("Odds Ratio: 10 ", mu,"g/m"^3, " Increase in Smoke PM"[2.5]))) +
xlab("Outcome") +
theme_bw()+
theme(panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(linetype = "dotted"),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 45, hjust=0.95, vjust = 0.75,
size = 10))
print(cumulative_death_plot)
Removed asthma and cardiac arrest as they were highly variable.
Table of the cumulative effects of smoke on underlying cause of death.
knitr::kable(dplyr::select(cumulative_death_results,outcome, odds_ratio:upper_95) %>%
mutate_at(vars(odds_ratio:upper_95), ~round(.x, 3)),
caption = "Cumulative Effect of Smoke on Mortality")
| outcome | odds_ratio | lower_95 | upper_95 |
|---|---|---|---|
| Respiratory | 1.113 | 0.907 | 1.367 |
| COPD | 0.920 | 0.698 | 1.212 |
| CVD | 1.050 | 0.926 | 1.191 |
| Heart Failure | 0.752 | 0.475 | 1.189 |
| Ischemic Heart Disease | 1.023 | 0.839 | 1.247 |
| Myocardial Infarction | 1.055 | 0.699 | 1.593 |
| Cerebrovascular | 1.080 | 0.809 | 1.442 |
Plot of daily estimates from the distributed lag model for mortality.
constrained_lag_results_death <- dl_death_results %>%
mutate(outcome = forcats::fct_relevel(cod, cause_death),
group = as.factor(if_else(cod %in% cause_death[1:3],
"Respiratory", "Cardiovascular"))) %>%
filter(!(outcome %in% c('Asthma', 'Cardiac Arrest'))) %>%
filter(type == 'lag')
# constrained distributed lag
dl_death_plot <- ggplot(data=constrained_lag_results_death,
aes(x=time, y=odds_ratio, group = group, color = group, fill = group)) +
geom_line(size = 1) +
geom_ribbon(aes(ymin = lower_95, ymax = upper_95), color = 'transparent',
alpha = 0.5) +
facet_wrap(~outcome, scales = 'free_y', ncol = 4) +
scale_color_manual("Underlying Cause of Death", values = c("#ff00cc", "darkblue")) +
scale_fill_manual("Underlying Cause of Death", values = c("#ff00cc", "darkblue")) +
geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
xlab("Lagged Days") +
theme_bw()+
theme(panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(linetype = "dotted"),
legend.direction = 'horizontal',
legend.position = 'bottom',
panel.grid.minor = element_blank(),
strip.background = element_blank())
print(dl_death_plot)
If you look at the effect sizes, I do think there may be some interesting associations with all cause respiratory and CVD. It also appears that maybe heart attacks and strokes could be increased on the day of smoke exposure.
But there is too much noise at this point I think to make any conclusions if there is an association or not with smoke and CVD or respiratory deaths in this data.
Daily distributed lagged estimates table.
knitr::kable(dplyr::select(constrained_lag_results_death, outcome, time:upper_95) %>%
mutate_at(vars(odds_ratio:upper_95), ~round(.x, 3)),
caption = 'Lagged Associations between Smoke and Underlying Cause of Death')
| outcome | time | odds_ratio | lower_95 | upper_95 |
|---|---|---|---|---|
| Respiratory | 0 | 1.028 | 0.916 | 1.153 |
| Respiratory | 1 | 1.044 | 0.991 | 1.100 |
| Respiratory | 2 | 1.049 | 0.975 | 1.130 |
| Respiratory | 3 | 1.034 | 0.960 | 1.114 |
| Respiratory | 4 | 1.000 | 0.947 | 1.056 |
| Respiratory | 5 | 0.956 | 0.850 | 1.076 |
| COPD | 0 | 0.999 | 0.861 | 1.159 |
| COPD | 1 | 0.999 | 0.933 | 1.070 |
| COPD | 2 | 0.997 | 0.901 | 1.103 |
| COPD | 3 | 0.989 | 0.894 | 1.094 |
| COPD | 4 | 0.975 | 0.906 | 1.050 |
| COPD | 5 | 0.959 | 0.820 | 1.121 |
| CVD | 0 | 1.006 | 0.935 | 1.082 |
| CVD | 1 | 1.016 | 0.983 | 1.050 |
| CVD | 2 | 1.021 | 0.977 | 1.067 |
| CVD | 3 | 1.017 | 0.973 | 1.063 |
| CVD | 4 | 1.004 | 0.971 | 1.037 |
| CVD | 5 | 0.986 | 0.918 | 1.060 |
| Heart Failure | 0 | 0.991 | 0.765 | 1.285 |
| Heart Failure | 1 | 1.000 | 0.894 | 1.119 |
| Heart Failure | 2 | 0.996 | 0.847 | 1.171 |
| Heart Failure | 3 | 0.968 | 0.822 | 1.138 |
| Heart Failure | 4 | 0.917 | 0.809 | 1.039 |
| Heart Failure | 5 | 0.858 | 0.649 | 1.135 |
| Ischemic Heart Disease | 0 | 0.946 | 0.845 | 1.060 |
| Ischemic Heart Disease | 1 | 1.012 | 0.961 | 1.065 |
| Ischemic Heart Disease | 2 | 1.058 | 0.987 | 1.134 |
| Ischemic Heart Disease | 3 | 1.058 | 0.986 | 1.134 |
| Ischemic Heart Disease | 4 | 1.011 | 0.959 | 1.065 |
| Ischemic Heart Disease | 5 | 0.944 | 0.845 | 1.056 |
| Myocardial Infarction | 0 | 1.092 | 0.864 | 1.379 |
| Myocardial Infarction | 1 | 1.078 | 0.974 | 1.193 |
| Myocardial Infarction | 2 | 1.054 | 0.915 | 1.214 |
| Myocardial Infarction | 3 | 1.011 | 0.874 | 1.168 |
| Myocardial Infarction | 4 | 0.951 | 0.849 | 1.064 |
| Myocardial Infarction | 5 | 0.885 | 0.697 | 1.125 |
| Cerebrovascular | 0 | 1.082 | 0.914 | 1.281 |
| Cerebrovascular | 1 | 1.024 | 0.948 | 1.105 |
| Cerebrovascular | 2 | 0.983 | 0.887 | 1.090 |
| Cerebrovascular | 3 | 0.973 | 0.878 | 1.079 |
| Cerebrovascular | 4 | 0.992 | 0.922 | 1.067 |
| Cerebrovascular | 5 | 1.027 | 0.873 | 1.208 |
Unconstrained lag for mortality.
# repeat outcomes 4 times
ucod <- rep(cause_death, each = 6)
# day to repeat
day <- rep(0:5, times = 9)
# lag day vector
lag_days <- c('', '_lag1', '_lag2', '_lag3', '_lag4', '_lag5')
# map across lagged days
death_uc_lag_results <- map_dfr(co_death_list, function(df){
data <- df
results <- map_dfr(lag_days, function(x){
# define function
f <- as.formula(paste('outcome~',
paste(paste0(c('gpm_smk10unit', 'o3_8hr_max_ppb', 'temp_f_grid'),
x), collapse = '+'),
'+strata(id)'))
r <- broom::tidy(clogit(f, data = data)) %>%
filter(term == paste0('gpm_smk10unit',x)) %>%
dplyr::select(term, estimate, conf.low, conf.high) %>%
mutate_at(vars(estimate:conf.high), exp)
})
}) %>%
cbind(ucod, day, .) %>%
mutate(outcome = forcats::fct_relevel(ucod, cause_death),
group = as.factor(if_else(outcome %in% cause_death[1:3],
"Respiratory", "Cardiovascular")))
Plot for unconstrained lag.
death_uc_plot <- ggplot(data=death_uc_lag_results, aes(x=day, y=estimate,
group = group, color = group)) +
geom_point() +
geom_errorbar(aes(ymin=conf.low, ymax=conf.high), width = 0.3) +
facet_wrap(~outcome, scales = 'free_y', ncol = 4) +
scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
xlab("Lagged Days") +
theme_bw()+
theme(panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(linetype = "dotted"),
legend.direction = 'horizontal',
legend.position = 'bottom',
panel.grid.minor = element_blank(),
strip.background = element_blank())
print(death_uc_plot)
Table for unconstrained lag values.
knitr::kable(dplyr::select(death_uc_lag_results, ucod, day, estimate:conf.high) %>%
mutate_at(vars(estimate:conf.high), ~round(.x, 3)),
caption = 'Uncontrained Lag Association between Smoke and Underlying Cause of Death')
| ucod | day | estimate | conf.low | conf.high |
|---|---|---|---|---|
| Respiratory | 0 | 1.072 | 0.943 | 1.219 |
| Respiratory | 1 | 1.107 | 0.980 | 1.249 |
| Respiratory | 2 | 1.087 | 0.958 | 1.233 |
| Respiratory | 3 | 1.022 | 0.893 | 1.169 |
| Respiratory | 4 | 1.106 | 0.971 | 1.259 |
| Respiratory | 5 | 0.950 | 0.830 | 1.086 |
| Asthma | 0 | 1.746 | 0.788 | 3.868 |
| Asthma | 1 | 2.070 | 0.640 | 6.691 |
| Asthma | 2 | 0.455 | 0.094 | 2.193 |
| Asthma | 3 | 3.301 | 1.032 | 10.559 |
| Asthma | 4 | 1.845 | 0.585 | 5.825 |
| Asthma | 5 | 1.484 | 0.516 | 4.271 |
| COPD | 0 | 1.014 | 0.862 | 1.192 |
| COPD | 1 | 0.971 | 0.820 | 1.149 |
| COPD | 2 | 0.959 | 0.801 | 1.148 |
| COPD | 3 | 0.935 | 0.776 | 1.127 |
| COPD | 4 | 1.042 | 0.878 | 1.237 |
| COPD | 5 | 0.897 | 0.748 | 1.075 |
| CVD | 0 | 1.024 | 0.944 | 1.111 |
| CVD | 1 | 1.041 | 0.961 | 1.127 |
| CVD | 2 | 1.052 | 0.972 | 1.138 |
| CVD | 3 | 1.007 | 0.933 | 1.087 |
| CVD | 4 | 1.044 | 0.966 | 1.129 |
| CVD | 5 | 0.987 | 0.910 | 1.070 |
| Heart Failure | 0 | 0.935 | 0.699 | 1.252 |
| Heart Failure | 1 | 1.037 | 0.774 | 1.389 |
| Heart Failure | 2 | 1.005 | 0.767 | 1.318 |
| Heart Failure | 3 | 0.837 | 0.623 | 1.126 |
| Heart Failure | 4 | 0.763 | 0.548 | 1.061 |
| Heart Failure | 5 | 0.854 | 0.630 | 1.158 |
| Cardiac Arrest | 0 | 2.932 | 1.111 | 7.740 |
| Cardiac Arrest | 1 | 1.457 | 0.697 | 3.044 |
| Cardiac Arrest | 2 | 1.110 | 0.415 | 2.970 |
| Cardiac Arrest | 3 | 2.005 | 0.865 | 4.650 |
| Cardiac Arrest | 4 | 2.390 | 1.046 | 5.463 |
| Cardiac Arrest | 5 | 1.653 | 0.805 | 3.393 |
| Ischemic Heart Disease | 0 | 0.968 | 0.854 | 1.099 |
| Ischemic Heart Disease | 1 | 1.051 | 0.928 | 1.190 |
| Ischemic Heart Disease | 2 | 1.088 | 0.959 | 1.234 |
| Ischemic Heart Disease | 3 | 1.051 | 0.932 | 1.187 |
| Ischemic Heart Disease | 4 | 1.014 | 0.891 | 1.154 |
| Ischemic Heart Disease | 5 | 0.978 | 0.864 | 1.108 |
| Myocardial Infarction | 0 | 1.168 | 0.900 | 1.516 |
| Myocardial Infarction | 1 | 1.114 | 0.867 | 1.431 |
| Myocardial Infarction | 2 | 1.095 | 0.866 | 1.385 |
| Myocardial Infarction | 3 | 1.037 | 0.803 | 1.340 |
| Myocardial Infarction | 4 | 0.942 | 0.720 | 1.232 |
| Myocardial Infarction | 5 | 0.876 | 0.667 | 1.149 |
| Cerebrovascular | 0 | 1.135 | 0.940 | 1.370 |
| Cerebrovascular | 1 | 0.988 | 0.818 | 1.195 |
| Cerebrovascular | 2 | 0.980 | 0.816 | 1.176 |
| Cerebrovascular | 3 | 0.961 | 0.807 | 1.145 |
| Cerebrovascular | 4 | 1.132 | 0.961 | 1.334 |
| Cerebrovascular | 5 | 0.937 | 0.777 | 1.130 |
Using the distributed lag model for each sex strata. Also modifying code to pull out n cases analyzed.
# estimating lagged effects
ucod <- rep(cause_death, each = 24)
sex_strata <- c('F', 'M')
sex_death_results <- map_dfr(co_death_list, function(df){
results <- map_dfr(sex_strata, function(x){
data <- df %>%
filter(sex == x) %>%
mutate(date = as.Date(date),
outcome = as.numeric(as.character(outcome))) %>%
# remove missing lagged data
filter(!is.na(gpm_smk10unit_lag5))
# create lagged matrix
pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1,
gpm_smk10unit_lag2, gpm_smk10unit_lag3,
gpm_smk10unit_lag4, gpm_smk10unit_lag5))
# temp matrix
temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
temp_f_grid_lag2, temp_f_grid_lag3,
temp_f_grid_lag4, temp_f_grid_lag5))
# ozone matrix
o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3,
o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
# define lagged basis spline
exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
# pm basis
pm_basis <- pm_mat %*% exp_b
# temp basis
temp_basis <- temp_mat %*% exp_b
# ozone basis
o3_basis <- o3_mat %*% exp_b
# run lagged model
lag_mod <- clogit(outcome ~ pm_basis + o3_basis + temp_basis + strata(id),
data = data)
# estimate lag estimate
lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) %>%
mutate(sex = x)
return(lag_est)
})
}) %>%
# bind in outcome names
bind_cols(data.frame(ucod), .)
Sex-specific plots of the cumulative effect.
# subset to cumulative results
cumulative_sex_death_strata <- sex_death_results %>%
filter(type == 'cumulative' & time == 5) %>%
mutate(sex_long = if_else(sex == 'F', 'Female', 'Male'),
outcome = forcats::fct_relevel(ucod, cause_death),
group = as.factor(if_else(ucod %in% cause_death[1:3],
"Respiratory", "Cardiovascular"))) %>%
filter(!(outcome %in% c('Asthma', 'Cardiac Arrest')))
# plot
sex_plot <- ggplot(data=cumulative_sex_death_strata, aes(x=sex_long, y=odds_ratio,
group = group, color = group)) +
geom_point() +
geom_errorbar(aes(ymin=lower_95, ymax=upper_95), width = 0.3) +
facet_wrap(~outcome, scales = 'free_y', ncol = 4) +
scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
xlab("Sex") +
theme_bw()+
theme(panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(linetype = "dotted"),
legend.direction = 'horizontal',
legend.position = 'bottom',
panel.grid.minor = element_blank(),
strip.background = element_blank())
print(sex_plot)
Sex-specific estimates table.
knitr::kable(dplyr::select(cumulative_sex_strata, outcome, sex_long, odds_ratio:upper_95) %>%
mutate_at(vars(odds_ratio:upper_95), ~round(.x, 3)),
caption = 'Sex-specific cumulative effect of smoke on hosptializations.')
| outcome | sex_long | odds_ratio | lower_95 | upper_95 |
|---|---|---|---|---|
| All Respiratory | Female | 1.005 | 0.897 | 1.125 |
| All Respiratory | Male | 1.055 | 0.939 | 1.185 |
| Asthma | Female | 1.268 | 0.945 | 1.701 |
| Asthma | Male | 1.292 | 0.954 | 1.749 |
| COPD | Female | 1.008 | 0.743 | 1.369 |
| COPD | Male | 1.095 | 0.807 | 1.487 |
| Acute Bronchitis | Female | 1.190 | 0.629 | 2.250 |
| Acute Bronchitis | Male | 1.683 | 0.937 | 3.022 |
| Pneumonia | Female | 1.005 | 0.820 | 1.232 |
| Pneumonia | Male | 1.066 | 0.863 | 1.316 |
| All CVD | Female | 1.041 | 0.951 | 1.139 |
| All CVD | Male | 1.037 | 0.952 | 1.130 |
| Arrhythmia | Female | 1.229 | 0.980 | 1.541 |
| Arrhythmia | Male | 0.950 | 0.751 | 1.203 |
| Cerebrovascular | Female | 1.188 | 0.906 | 1.557 |
| Cerebrovascular | Male | 1.178 | 0.975 | 1.423 |
| Heart Failure | Female | 1.215 | 0.919 | 1.605 |
| Heart Failure | Male | 1.185 | 0.979 | 1.434 |
| Ischemic Heart Disease | Female | 1.071 | 0.859 | 1.334 |
| Ischemic Heart Disease | Male | 1.035 | 0.833 | 1.287 |
| Myocardial Infarction | Female | 1.060 | 0.881 | 1.275 |
| Myocardial Infarction | Male | 0.883 | 0.715 | 1.090 |
# repeat cause of death name
ucod <- rep(cause_death[c(1,4)], each = 36)
# define age strata to loop through
age_strata <- c('age_under_15', 'age_15_to_65', 'age_over_65')
age_death_results <- map_dfr(co_death_list[c(1,4)], function(df){
results <- map_dfr(age_strata, function(x){
data <- df %>%
filter(age_cat == x) %>%
mutate(date = as.Date(date),
outcome = as.numeric(as.character(outcome))) %>%
# remove missing lagged data
filter(!is.na(gpm_smk10unit_lag5))
# create lagged matrix
pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1,
gpm_smk10unit_lag2, gpm_smk10unit_lag3,
gpm_smk10unit_lag4, gpm_smk10unit_lag5))
# temp matrix
temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
temp_f_grid_lag2, temp_f_grid_lag3,
temp_f_grid_lag4, temp_f_grid_lag5))
# ozone matrix
o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3,
o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
# define lagged basis spline
exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
# pm basis
pm_basis <- pm_mat %*% exp_b
# temp basis
temp_basis <- temp_mat %*% exp_b
# ozone basis
o3_basis <- o3_mat %*% exp_b
# run lagged model
lag_mod <- clogit(outcome ~ pm_basis + o3_basis + temp_basis + strata(id),
data = data)
# estimate lag estimate
lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) %>%
mutate(age_cat = x)
return(lag_est)
})
}) %>%
bind_cols(data.frame(ucod), .)
Age-specific plots of the cumulative effect.I’m going to cut out deaths under 15 because there aren’t many of them and the variance is high.
# subset to cumulative results
cumulative_age_death_strata <- age_death_results %>%
filter(type == 'cumulative' & time == 5) %>%
filter(age_cat != 'age_under_15') %>%
mutate(age_long = forcats::fct_relevel(
case_when(age_cat == 'age_under_15' ~ 'Age < 15',
age_cat == 'age_15_to_65' ~ 'Age 15 to 64',
age_cat == 'age_over_65' ~ 'Age > 64'),
c('Age < 15', 'Age 15 to 64', 'Age > 64')),
outcome = forcats::fct_relevel(if_else(ucod == 'Respiratory', 'Respiratory',
'Cardiovascular'),
c('Cardiovascular', 'Respiratory')))
# plot
age_plot <- ggplot(data=cumulative_age_death_strata, aes(x=age_long, y=odds_ratio,
group = outcome, color = outcome)) +
geom_point() +
geom_errorbar(aes(ymin=lower_95, ymax=upper_95), width = 0.3) +
facet_wrap(~outcome, scales = 'free_y', ncol = 4) +
scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
xlab("Age") +
theme_bw()+
theme(panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(linetype = "dotted"),
legend.direction = 'horizontal',
legend.position = 'bottom',
panel.grid.minor = element_blank(),
strip.background = element_blank())
print(age_plot)
Table of age-specific associations.
knitr::kable(dplyr::select(cumulative_age_death_strata,
outcome, age_long, odds_ratio:upper_95) %>%
mutate_at(vars(odds_ratio:upper_95), ~round(.x, 3)),
caption = 'Age-specific associations between smoke and underlying cause of death')
| outcome | age_long | odds_ratio | lower_95 | upper_95 |
|---|---|---|---|---|
| Respiratory | Age 15 to 64 | 0.886 | 0.487 | 1.608 |
| Respiratory | Age > 64 | 1.158 | 0.929 | 1.442 |
| Cardiovascular | Age 15 to 64 | 1.172 | 0.883 | 1.555 |
| Cardiovascular | Age > 64 | 1.021 | 0.887 | 1.176 |
The continuous definition of smoke may have some misclassification bias, particularly where there is difficulty distinguishing from PM2.5 impacted by smoke and a high anthropocentric PM2.5 day that also happens to have some smoke in the atmospheric column. I am going to try out a binary outcome of > 10 ug/m^3 PM2.5 with HMS overhead as a binary indicator for smoke and look at the association between multiple days of a binary smoke exposure and the risk of cardiopulmonary morbidity.
out_rep <- rep(outcome, each = 12)
hosp_binary_results <- lapply(co_hosp_list, function(x){
# output dataframe from list
data <- x %>%
mutate(date = as.Date(date),
outcome = as.numeric(as.character(outcome))) %>%
# remove missing lagged data
filter(!is.na(gsmk10_hms_lag5))
# create lagged matrix
pm_mat <- as.matrix(dplyr::select(data, gsmk10_hms, gsmk10_hms_lag1,
gsmk10_hms_lag2, gsmk10_hms_lag3,
gsmk10_hms_lag4, gsmk10_hms_lag5))
# temp matrix
temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
temp_f_grid_lag2, temp_f_grid_lag3,
temp_f_grid_lag4, temp_f_grid_lag5))
# define lagged basis spline
exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
# pm basis
pm_basis <- pm_mat %*% exp_b
# temp basis
temp_basis <- temp_mat %*% exp_b
# run lagged model
lag_mod <- clogit(outcome ~ pm_basis + temp_basis + strata(id), data = data)
# estimate lag estimate
lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b)
return(lag_est)
}) %>% #end lappply
# bind rows
map_dfr(.,rbind) %>%
# bind in outcome names
bind_cols(data.frame(out_rep), .)
# get the cumulative effect over 6 days
cumulative_bin_results <- filter(hosp_binary_results,
type == 'cumulative' & time == 5) %>%
dplyr::select(-strata, -time) %>%
mutate(outcome = forcats::fct_relevel(outcome, out_order),
group = as.factor(if_else(outcome %in% out_order[1:5],
"Respiratory", "Cardiovascular")))
# plot
cbin_hosp_plot <- ggplot(data = cumulative_bin_results,
aes(x=outcome, y = odds_ratio, color = group)) +
geom_point() +
geom_errorbar(aes(ymin=lower_95, ymax=upper_95), width = 0.3) +
scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
ylab(expression(paste("Odds Ratio: Smoke PM"[2.5], " > 10 ug/m"^3))) +
xlab("Outcome") +
ggtitle("Hospitalization Cumulative Smoke Exposure 0 to 5 Days") +
theme_bw()+
theme(panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(linetype = "dotted"),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 45, hjust=0.95, vjust = 0.75,
size = 10))
print(cbin_hosp_plot)
There is a significant association with this binary cutoff and asthma over 0 to 5 days. Some of the cardiovascular outcomes that were trending towards ‘significance’ were attenuated to the null here.
cod <- rep(cause_death, each = 12)
binary_death_results <- lapply(co_death_list, function(x){
# output dataframe from list
data <- x %>%
mutate(date = as.Date(date),
outcome = as.numeric(as.character(outcome))) %>%
# remove missing lagged data
filter(!is.na(gpm_smk10unit_lag5))
# create lagged matrix
pm_mat <- as.matrix(dplyr::select(data, gsmk10_hms, gsmk10_hms_lag1,
gsmk10_hms_lag2, gsmk10_hms_lag3,
gsmk10_hms_lag4, gsmk10_hms_lag5))
# temp matrix
temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
temp_f_grid_lag2, temp_f_grid_lag3,
temp_f_grid_lag4, temp_f_grid_lag5))
# define lagged basis spline
exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
# pm basis
pm_basis <- pm_mat %*% exp_b
# temp basis
temp_basis <- temp_mat %*% exp_b
temp_basis <-
# run lagged model
lag_mod <- clogit(outcome ~ pm_basis + temp_basis + strata(id), data = data)
# estimate lag estimate
lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b)
return(lag_est)
}) %>% #end lappply
# bind rows
map_dfr(.,rbind) %>%
# bind in outcome names
bind_cols(data.frame(cod), .)
# get the cumulative effect over 6 days
c_bin_death_results <- filter(binary_death_results,
type == 'cumulative' & time == 5) %>%
dplyr::select(-strata, -time) %>%
mutate(outcome = forcats::fct_relevel(cod, cause_death),
group = as.factor(if_else(cod %in% cause_death[1:3],
"Respiratory", "Cardiovascular"))) %>%
filter(!(outcome %in% c('Asthma', 'Cardiac Arrest')))
# plot
cbin_death_plot <- ggplot(data = c_bin_death_results,
aes(x=outcome, y = odds_ratio, color = group)) +
geom_point() +
geom_errorbar(aes(ymin=lower_95, ymax=upper_95), width = 0.3) +
scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
ylab(expression(paste("Odds Ratio: Smoke PM"[2.5], " > 10 ug/m"^3))) +
xlab("Outcome") +
ggtitle("Mortality Cumulative Smoke Exposure 0 to 5 Lagged Days") +
theme_bw()+
theme(panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(linetype = "dotted"),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 45, hjust=0.95, vjust = 0.75,
size = 10))
print(cbin_death_plot)
No association with mortality.
Looking at specific fires. I think isolating to specific counties impacted by specific fires gives me more confidence in the kriged models to understand the exposure series. We’ve also used this approach in Washington and Oregon studies. I’m going to run separate models in each section, but plot estimates side by side in a couple plots.
Since there are fewer deaths for some of the subcategories and I’m reducing further to specific times and counties, I’m going to only look at respiratory and cardiovascular general categories of underlying cause of death.
The first fire I’ll look at is the Four-Mile Canyon Fire that occurred west of Boulder. It started on September 7, 2010 and contained on September 13th. I will limit the time frame to 2010-05-01 to 2010-10-31 and to Boulder County only. I’m open to adding additional counties known to be impacted by wildfire smoke.
out_rep <- rep(outcome, each = 12)
mile4_hosp_cont <-lapply(co_hosp_list, function(x){
# output dataframe from list
data <- x %>%
# filter to boulder
filter(fips.x == '08013') %>%
filter(date >= '2010-05-01' & date <= '2010-10-31') %>%
mutate(date = as.Date(date),
outcome = as.numeric(as.character(outcome))) %>%
# remove missing lagged data
filter(!is.na(gpm_smk10unit_lag5))
# create lagged matrix
pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1,
gpm_smk10unit_lag2, gpm_smk10unit_lag3,
gpm_smk10unit_lag4, gpm_smk10unit_lag5))
# temp matrix
temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
temp_f_grid_lag2, temp_f_grid_lag3,
temp_f_grid_lag4, temp_f_grid_lag5))
# ozone matrix
o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3,
o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
# define lagged basis spline
exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
# pm basis
pm_basis <- pm_mat %*% exp_b
# temp basis
temp_basis <- temp_mat %*% exp_b
# ozone basis
o3_basis <- o3_mat %*% exp_b
# run lagged model
lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)
# estimate lag estimate
lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b)
return(lag_est)
}) %>% #end lappply
# bind rows
map_dfr(.,rbind) %>%
# bind in outcome names
bind_cols(data.frame(out_rep), .) %>%
mutate(event = 'Four Mile 2010')
cod <- rep(names(co_death_list[c(1,4)]), each=12)
mile4_death_cont <- lapply(co_death_list[c(1,4)], function(x){
# output dataframe from list
x <- co_death_list[[1]]
data <- x %>%
# filter to boulder
filter(fips.x == '08013') %>%
filter(date >= '2010-05-01' & date <= '2010-10-31') %>%
mutate(date = as.Date(date),
outcome = as.numeric(as.character(outcome))) %>%
# remove missing lagged data
filter(!is.na(gpm_smk10unit_lag5))
# create lagged matrix
pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1,
gpm_smk10unit_lag2, gpm_smk10unit_lag3,
gpm_smk10unit_lag4, gpm_smk10unit_lag5))
# temp matrix
temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
temp_f_grid_lag2, temp_f_grid_lag3,
temp_f_grid_lag4, temp_f_grid_lag5))
# ozone matrix
o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3,
o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
# define lagged basis spline
exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
# pm basis
pm_basis <- pm_mat %*% exp_b
# temp basis
temp_basis <- temp_mat %*% exp_b
# ozone basis
o3_basis <- o3_mat %*% exp_b
# run lagged model
lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)
# estimate lag estimate
lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b)
return(lag_est)
}) %>% #end lappply
# bind rows
map_dfr(.,rbind) %>%
# bind in outcome names
bind_cols(data.frame(cod), .) %>%
mutate(event = 'Four Mile 2010')
Waldo Canyon started on June 23rd 2012 and burned until July 10th 2012 near Colorado Springs. Limiting to events from El Paso County to assess impact of smoke. Are there other counties I should consider?
Limiting hospitalizations and deaths to May 2012 through August 2012. I’m consider expanding time frame and other counties too.
out_rep <- rep(outcome, each = 12)
waldo_hosp_cont <-lapply(co_hosp_list, function(x){
# output dataframe from list
data <- x %>%
# filter to el paso
filter(fips.x == '08041') %>%
filter(date >= '2012-05-01' & date <= '2012-10-31') %>%
mutate(date = as.Date(date),
outcome = as.numeric(as.character(outcome))) %>%
# remove missing lagged data
filter(!is.na(gpm_smk10unit_lag5))
# create lagged matrix
pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1,
gpm_smk10unit_lag2, gpm_smk10unit_lag3,
gpm_smk10unit_lag4, gpm_smk10unit_lag5))
# temp matrix
temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
temp_f_grid_lag2, temp_f_grid_lag3,
temp_f_grid_lag4, temp_f_grid_lag5))
# ozone matrix
o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3,
o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
# define lagged basis spline
exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
# pm basis
pm_basis <- pm_mat %*% exp_b
# temp basis
temp_basis <- temp_mat %*% exp_b
# ozone basis
o3_basis <- o3_mat %*% exp_b
# run lagged model
lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)
# estimate lag estimate
lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b)
return(lag_est)
}) %>% #end lappply
# bind rows
map_dfr(.,rbind) %>%
# bind in outcome names
bind_cols(data.frame(out_rep), .) %>%
mutate(event = 'Waldo Canyon 2012')
cod <- rep(names(co_death_list[c(1,4)]), each=12)
waldo_death_cont <- lapply(co_death_list[c(1,4)], function(x){
# output dataframe from list
data <- x %>%
# filter to el paso
filter(fips.x == '08041') %>%
filter(date >= '2012-05-01' & date <= '2012-10-31') %>%
mutate(date = as.Date(date),
outcome = as.numeric(as.character(outcome))) %>%
# remove missing lagged data
filter(!is.na(gpm_smk10unit_lag5))
# create lagged matrix
pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1,
gpm_smk10unit_lag2, gpm_smk10unit_lag3,
gpm_smk10unit_lag4, gpm_smk10unit_lag5))
# temp matrix
temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
temp_f_grid_lag2, temp_f_grid_lag3,
temp_f_grid_lag4, temp_f_grid_lag5))
# ozone matrix
o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3,
o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
# define lagged basis spline
exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
# pm basis
pm_basis <- pm_mat %*% exp_b
# temp basis
temp_basis <- temp_mat %*% exp_b
# ozone basis
o3_basis <- o3_mat %*% exp_b
# run lagged model
lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)
# estimate lag estimate
lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b)
return(lag_est)
}) %>% #end lappply
# bind rows
map_dfr(.,rbind) %>%
# bind in outcome names
bind_cols(data.frame(cod), .) %>%
mutate(event = 'Waldo Canyon 2012')
For this fire, I’m limiting hospitalizations and deaths to May 2012 through August 2012 occurring in Larmier county.
out_rep <- rep(outcome, each = 12)
hipark_hosp_cont <-lapply(co_hosp_list, function(x){
# output dataframe from list
data <- x %>%
# filter to larimer
filter(fips.x == '08069') %>%
filter(date >= '2012-05-01' & date <= '2012-10-31') %>%
mutate(date = as.Date(date),
outcome = as.numeric(as.character(outcome))) %>%
# remove missing lagged data
filter(!is.na(gpm_smk10unit_lag5))
# create lagged matrix
pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1,
gpm_smk10unit_lag2, gpm_smk10unit_lag3,
gpm_smk10unit_lag4, gpm_smk10unit_lag5))
# temp matrix
temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
temp_f_grid_lag2, temp_f_grid_lag3,
temp_f_grid_lag4, temp_f_grid_lag5))
# ozone matrix
o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3,
o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
# define lagged basis spline
exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
# pm basis
pm_basis <- pm_mat %*% exp_b
# temp basis
temp_basis <- temp_mat %*% exp_b
# ozone basis
o3_basis <- o3_mat %*% exp_b
# run lagged model
lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)
# estimate lag estimate
lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b)
return(lag_est)
}) %>% #end lappply
# bind rows
map_dfr(.,rbind) %>%
# bind in outcome names
bind_cols(data.frame(out_rep), .) %>%
mutate(event = 'High Park 2012')
cod <- rep(names(co_death_list[c(1,4)]), each=12)
hipark_death_cont <- lapply(co_death_list[c(1,4)], function(x){
# output dataframe from list
data <- x %>%
# filter to el paso
filter(fips.x == '08069') %>%
filter(date >= '2012-05-01' & date <= '2012-10-31') %>%
mutate(date = as.Date(date),
outcome = as.numeric(as.character(outcome))) %>%
# remove missing lagged data
filter(!is.na(gpm_smk10unit_lag5))
# create lagged matrix
pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1,
gpm_smk10unit_lag2, gpm_smk10unit_lag3,
gpm_smk10unit_lag4, gpm_smk10unit_lag5))
# temp matrix
temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
temp_f_grid_lag2, temp_f_grid_lag3,
temp_f_grid_lag4, temp_f_grid_lag5))
# ozone matrix
o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3,
o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
# define lagged basis spline
exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
# pm basis
pm_basis <- pm_mat %*% exp_b
# temp basis
temp_basis <- temp_mat %*% exp_b
# ozone basis
o3_basis <- o3_mat %*% exp_b
# run lagged model
lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)
# estimate lag estimate
lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b)
return(lag_est)
}) %>% #end lappply
# bind rows
map_dfr(.,rbind) %>%
# bind in outcome names
bind_cols(data.frame(cod), .) %>%
mutate(event = 'High Park 2012')
2012 local fires for all grid cells in study domain.
out_rep <- rep(outcome, each = 12)
local_hosp_cont <-lapply(co_hosp_list, function(x){
# output dataframe from list
data <- x %>%
filter(date >= '2012-05-01' & date <= '2012-10-31') %>%
mutate(date = as.Date(date),
outcome = as.numeric(as.character(outcome))) %>%
# remove missing lagged data
filter(!is.na(gpm_smk10unit_lag5))
# create lagged matrix
pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1,
gpm_smk10unit_lag2, gpm_smk10unit_lag3,
gpm_smk10unit_lag4, gpm_smk10unit_lag5))
# temp matrix
temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
temp_f_grid_lag2, temp_f_grid_lag3,
temp_f_grid_lag4, temp_f_grid_lag5))
# ozone matrix
o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3,
o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
# define lagged basis spline
exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
# pm basis
pm_basis <- pm_mat %*% exp_b
# temp basis
temp_basis <- temp_mat %*% exp_b
# ozone basis
o3_basis <- o3_mat %*% exp_b
# run lagged model
lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)
# estimate lag estimate
lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b)
return(lag_est)
}) %>% #end lappply
# bind rows
map_dfr(.,rbind) %>%
# bind in outcome names
bind_cols(data.frame(out_rep), .) %>%
mutate(event = 'Local 2012')
cod <- rep(names(co_death_list[c(1,4)]), each=12)
local_death_cont <- lapply(co_death_list[c(1,4)], function(x){
# output dataframe from list
data <- x %>%
filter(date >= '2012-05-01' & date <= '2012-10-31') %>%
mutate(date = as.Date(date),
outcome = as.numeric(as.character(outcome))) %>%
# remove missing lagged data
filter(!is.na(gpm_smk10unit_lag5))
# create lagged matrix
pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1,
gpm_smk10unit_lag2, gpm_smk10unit_lag3,
gpm_smk10unit_lag4, gpm_smk10unit_lag5))
# temp matrix
temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
temp_f_grid_lag2, temp_f_grid_lag3,
temp_f_grid_lag4, temp_f_grid_lag5))
# ozone matrix
o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3,
o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
# define lagged basis spline
exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
# pm basis
pm_basis <- pm_mat %*% exp_b
# temp basis
temp_basis <- temp_mat %*% exp_b
# ozone basis
o3_basis <- o3_mat %*% exp_b
# run lagged model
lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)
# estimate lag estimate
lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b)
return(lag_est)
}) %>% #end lappply
# bind rows
map_dfr(.,rbind) %>%
# bind in outcome names
bind_cols(data.frame(cod), .) %>%
mutate(event = 'Local 2012')
Fires in Canada and Washington led to the transported smoke event that affected the Front Range communities in Colorado in 2015. I have limited this analysis to 2015-05-01 to 2015-10-31 (note hospitalizations only go until 2015-10-01). I included all Front Range counties.
out_rep <- rep(outcome, each = 12)
trans_hosp_cont <-lapply(co_hosp_list, function(x){
# output dataframe from list
data <- x %>%
filter(date >= '2015-05-01' & date <= '2015-10-31') %>%
mutate(date = as.Date(date),
outcome = as.numeric(as.character(outcome))) %>%
# remove missing lagged data
filter(!is.na(gpm_smk10unit_lag5))
# create lagged matrix
pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1,
gpm_smk10unit_lag2, gpm_smk10unit_lag3,
gpm_smk10unit_lag4, gpm_smk10unit_lag5))
# temp matrix
temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
temp_f_grid_lag2, temp_f_grid_lag3,
temp_f_grid_lag4, temp_f_grid_lag5))
# ozone matrix
o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3,
o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
# define lagged basis spline
exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
# pm basis
pm_basis <- pm_mat %*% exp_b
# temp basis
temp_basis <- temp_mat %*% exp_b
# ozone basis
o3_basis <- o3_mat %*% exp_b
# run lagged model
lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)
# estimate lag estimate
lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b)
return(lag_est)
}) %>% #end lappply
# bind rows
map_dfr(.,rbind) %>%
# bind in outcome names
bind_cols(data.frame(out_rep), .) %>%
mutate(event = 'Transport 2015')
cod <- rep(names(co_death_list[c(1,4)]), each=12)
trans_death_cont <- lapply(co_death_list[c(1,4)], function(x){
# output dataframe from list
data <- x %>%
filter(date >= '2015-05-01' & date <= '2015-10-31') %>%
mutate(date = as.Date(date),
outcome = as.numeric(as.character(outcome))) %>%
# remove missing lagged data
filter(!is.na(gpm_smk10unit_lag5))
# create lagged matrix
pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1,
gpm_smk10unit_lag2, gpm_smk10unit_lag3,
gpm_smk10unit_lag4, gpm_smk10unit_lag5))
# temp matrix
temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
temp_f_grid_lag2, temp_f_grid_lag3,
temp_f_grid_lag4, temp_f_grid_lag5))
# ozone matrix
o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3,
o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
# define lagged basis spline
exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
# pm basis
pm_basis <- pm_mat %*% exp_b
# temp basis
temp_basis <- temp_mat %*% exp_b
# ozone basis
o3_basis <- o3_mat %*% exp_b
# run lagged model
lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)
# estimate lag estimate
lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b)
return(lag_est)
}) %>% #end lappply
# bind rows
map_dfr(.,rbind) %>%
# bind in outcome names
bind_cols(data.frame(cod), .) %>%
mutate(event = 'Transport 2015')
Plots of association for each hospitalization outcome by event. I left out Four Mile Canyon fire since there were so few events the estimates were unstable. This can probably be fixed by adding in other counties that were impacted by smoke.
# bind event dataframes together
hosp_event <- bind_rows(waldo_hosp_cont, hipark_hosp_cont, local_hosp_cont,
trans_hosp_cont) %>%
filter(type == 'cumulative' & time == 5) %>%
dplyr::select(-strata, -time) %>%
mutate(outcome = forcats::fct_relevel(out_rep, out_order),
group = as.factor(if_else(outcome %in% out_order[1:5],
"Respiratory", "Cardiovascular")),
# ordering fire event
event = forcats::fct_relevel(event, c('Waldo Canyon 2012', 'High Park 2012',
'Local 2012', 'Transport 2012'))) %>%
# filter out acute bronchitis
filter(outcome != 'Acute Bronchitis')
# plot
hosp_event_plot <- ggplot(data = hosp_event,
aes(x=outcome, y=odds_ratio, color = group)) +
geom_point() +
geom_errorbar(aes(ymin=lower_95, ymax=upper_95), width = 0.3) +
geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
facet_wrap(~event, scales = 'free_y') +
scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
xlab("Outcome") +
theme_bw()+
theme(panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(linetype = "dotted"),
panel.grid.minor = element_blank(),
legend.direction = 'horizontal',
legend.position = 'bottom',
strip.background = element_blank(),
axis.text.x = element_text(angle = 45, hjust=0.95, vjust = 0.75,
size = 10))
print(hosp_event_plot)
I think this plot is kind of interesting for asthma, where there is a positive association with the 2015 transport smoke and an inverse association with the 2012 local fires for the study domain. I don’t see an association with the individual fires, which may suggest the inverse association may be due to exposure misclassification? Or it could be real where poeple are taking risk-avoiding behaviors.I think Sheryl and I hypothesized that there might be an effect here because the first couple days of the smoke event were really hard to tell there was actual smoke and not fog. Also note that I included all front range counties because there were air quality warning up and down the front range. I think we could also do a 2012 to 2015 comparison. This might actually be the cleanest way to do this analysis, with the argument that the Front Range was impacted by smoke from two large fires in 2012 and impacted by smoke from transported smoke.
I left out Four Mile Canyon fire for now since the estimates were unstable due to few events.
Printing out fire events hospitalization table.
knitr::kable(dplyr::select(hosp_event, event, outcome, odds_ratio:upper_95) %>%
mutate_at(vars(odds_ratio:upper_95), ~round(.x, 3)),
caption = 'Cumulative association between smoke from specific events and hospitalizations')
| event | outcome | odds_ratio | lower_95 | upper_95 |
|---|---|---|---|---|
| Waldo Canyon 2012 | All Respiratory | 1.155 | 0.767 | 1.741 |
| Waldo Canyon 2012 | Asthma | 0.622 | 0.203 | 1.906 |
| Waldo Canyon 2012 | COPD | 1.056 | 0.360 | 3.092 |
| Waldo Canyon 2012 | Pneumonia | 1.549 | 0.754 | 3.182 |
| Waldo Canyon 2012 | All CVD | 1.032 | 0.746 | 1.429 |
| Waldo Canyon 2012 | Arrhythmia | 0.769 | 0.301 | 1.967 |
| Waldo Canyon 2012 | Cerebrovascular | 1.007 | 0.458 | 2.214 |
| Waldo Canyon 2012 | Heart Failure | 0.988 | 0.448 | 2.182 |
| Waldo Canyon 2012 | Ischemic Heart Disease | 1.291 | 0.554 | 3.006 |
| Waldo Canyon 2012 | Myocardial Infarction | 0.713 | 0.331 | 1.534 |
| High Park 2012 | All Respiratory | 1.089 | 0.730 | 1.624 |
| High Park 2012 | Asthma | 0.635 | 0.138 | 2.917 |
| High Park 2012 | COPD | 1.107 | 0.322 | 3.812 |
| High Park 2012 | Pneumonia | 1.555 | 0.827 | 2.923 |
| High Park 2012 | All CVD | 1.020 | 0.752 | 1.384 |
| High Park 2012 | Arrhythmia | 1.209 | 0.550 | 2.656 |
| High Park 2012 | Cerebrovascular | 1.178 | 0.575 | 2.414 |
| High Park 2012 | Heart Failure | 1.331 | 0.644 | 2.750 |
| High Park 2012 | Ischemic Heart Disease | 0.954 | 0.409 | 2.226 |
| High Park 2012 | Myocardial Infarction | 1.159 | 0.623 | 2.158 |
| Local 2012 | All Respiratory | 0.944 | 0.827 | 1.079 |
| Local 2012 | Asthma | 0.711 | 0.495 | 1.022 |
| Local 2012 | COPD | 0.920 | 0.650 | 1.301 |
| Local 2012 | Pneumonia | 1.105 | 0.871 | 1.402 |
| Local 2012 | All CVD | 1.011 | 0.913 | 1.120 |
| Local 2012 | Arrhythmia | 1.079 | 0.834 | 1.396 |
| Local 2012 | Cerebrovascular | 1.132 | 0.870 | 1.472 |
| Local 2012 | Heart Failure | 1.186 | 0.908 | 1.549 |
| Local 2012 | Ischemic Heart Disease | 1.024 | 0.782 | 1.339 |
| Local 2012 | Myocardial Infarction | 0.986 | 0.784 | 1.239 |
| Transport 2015 | All Respiratory | 1.052 | 0.921 | 1.201 |
| Transport 2015 | Asthma | 1.614 | 1.154 | 2.256 |
| Transport 2015 | COPD | 0.991 | 0.669 | 1.467 |
| Transport 2015 | Pneumonia | 0.886 | 0.690 | 1.140 |
| Transport 2015 | All CVD | 1.115 | 1.011 | 1.230 |
| Transport 2015 | Arrhythmia | 1.121 | 0.857 | 1.466 |
| Transport 2015 | Cerebrovascular | 1.117 | 0.879 | 1.420 |
| Transport 2015 | Heart Failure | 1.116 | 0.875 | 1.424 |
| Transport 2015 | Ischemic Heart Disease | 1.149 | 0.905 | 1.457 |
| Transport 2015 | Myocardial Infarction | 1.114 | 0.897 | 1.384 |
Plotting underlying cause of death by fire events.
# bind event dataframes together
death_event <- bind_rows(waldo_death_cont, hipark_death_cont, local_death_cont,
trans_death_cont) %>%
filter(type == 'cumulative' & time == 5) %>%
dplyr::select(-strata, -time) %>%
mutate(outcome = as.factor(if_else(cod == 'resp',
'Respiratory', 'Cardiovascular')),
# ordering fire event
event = forcats::fct_relevel(event, c('Waldo Canyon 2012', 'High Park 2012',
'Local 2012', 'Transport 2012')))
# plot
death_event_plot <- ggplot(data = death_event,
aes(x=outcome, y=odds_ratio, color = outcome)) +
geom_point() +
geom_errorbar(aes(ymin=lower_95, ymax=upper_95), width = 0.3) +
geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
facet_wrap(~event) +
scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
xlab("Outcome") +
theme_bw()+
theme(panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(linetype = "dotted"),
panel.grid.minor = element_blank(),
legend.direction = 'horizontal',
legend.position = 'bottom',
strip.background = element_blank(),
axis.text.x = element_text(angle = 45, hjust=0.95, vjust = 0.75,
size = 10))
print(death_event_plot)
No association with deaths.
Table for mortality for specific events.
knitr::kable(dplyr::select(death_event, outcome, event,odds_ratio:upper_95) %>%
mutate_at(vars(odds_ratio:upper_95), ~round(.x, 3)),
caption = 'Cumulative association with smoke and mortality for specific events')
| outcome | event | odds_ratio | lower_95 | upper_95 |
|---|---|---|---|---|
| Respiratory | Waldo Canyon 2012 | 0.750 | 0.218 | 2.578 |
| Cardiovascular | Waldo Canyon 2012 | 1.294 | 0.678 | 2.472 |
| Respiratory | High Park 2012 | 0.220 | 0.051 | 0.950 |
| Cardiovascular | High Park 2012 | 0.770 | 0.409 | 1.447 |
| Respiratory | Local 2012 | 0.998 | 0.702 | 1.419 |
| Cardiovascular | Local 2012 | 1.086 | 0.878 | 1.343 |
| Respiratory | Transport 2015 | 1.024 | 0.741 | 1.415 |
| Cardiovascular | Transport 2015 | 0.969 | 0.796 | 1.180 |
I think there could be something interesting looking at 2012 vs. 2015 smoke, where transported smoke could be a risk factor. The hypothesis here could be that people are aware of smoke from local fires since they are publicized, where the transported smoke was not clear to people until there were air quality warnings. Mortality doesn’t seem to be associated with these short-term smoke events.